1c1d7c514SDavid Sterba // SPDX-License-Identifier: GPL-2.0 253b381b3SDavid Woodhouse /* 353b381b3SDavid Woodhouse * Copyright (C) 2012 Fusion-io All rights reserved. 453b381b3SDavid Woodhouse * Copyright (C) 2012 Intel Corp. All rights reserved. 553b381b3SDavid Woodhouse */ 6c1d7c514SDavid Sterba 753b381b3SDavid Woodhouse #include <linux/sched.h> 853b381b3SDavid Woodhouse #include <linux/bio.h> 953b381b3SDavid Woodhouse #include <linux/slab.h> 1053b381b3SDavid Woodhouse #include <linux/blkdev.h> 1153b381b3SDavid Woodhouse #include <linux/raid/pq.h> 1253b381b3SDavid Woodhouse #include <linux/hash.h> 1353b381b3SDavid Woodhouse #include <linux/list_sort.h> 1453b381b3SDavid Woodhouse #include <linux/raid/xor.h> 15818e010bSDavid Sterba #include <linux/mm.h> 1653b381b3SDavid Woodhouse #include "ctree.h" 1753b381b3SDavid Woodhouse #include "disk-io.h" 1853b381b3SDavid Woodhouse #include "volumes.h" 1953b381b3SDavid Woodhouse #include "raid56.h" 2053b381b3SDavid Woodhouse #include "async-thread.h" 2153b381b3SDavid Woodhouse 2253b381b3SDavid Woodhouse /* set when additional merges to this rbio are not allowed */ 2353b381b3SDavid Woodhouse #define RBIO_RMW_LOCKED_BIT 1 2453b381b3SDavid Woodhouse 254ae10b3aSChris Mason /* 264ae10b3aSChris Mason * set when this rbio is sitting in the hash, but it is just a cache 274ae10b3aSChris Mason * of past RMW 284ae10b3aSChris Mason */ 294ae10b3aSChris Mason #define RBIO_CACHE_BIT 2 304ae10b3aSChris Mason 314ae10b3aSChris Mason /* 324ae10b3aSChris Mason * set when it is safe to trust the stripe_pages for caching 334ae10b3aSChris Mason */ 344ae10b3aSChris Mason #define RBIO_CACHE_READY_BIT 3 354ae10b3aSChris Mason 364ae10b3aSChris Mason #define RBIO_CACHE_SIZE 1024 374ae10b3aSChris Mason 388a953348SDavid Sterba #define BTRFS_STRIPE_HASH_TABLE_BITS 11 398a953348SDavid Sterba 408a953348SDavid Sterba /* Used by the raid56 code to lock stripes for read/modify/write */ 418a953348SDavid Sterba struct btrfs_stripe_hash { 428a953348SDavid Sterba struct list_head hash_list; 438a953348SDavid Sterba spinlock_t lock; 448a953348SDavid Sterba }; 458a953348SDavid Sterba 468a953348SDavid Sterba /* Used by the raid56 code to lock stripes for read/modify/write */ 478a953348SDavid Sterba struct btrfs_stripe_hash_table { 488a953348SDavid Sterba struct list_head stripe_cache; 498a953348SDavid Sterba spinlock_t cache_lock; 508a953348SDavid Sterba int cache_size; 518a953348SDavid Sterba struct btrfs_stripe_hash table[]; 528a953348SDavid Sterba }; 538a953348SDavid Sterba 541b94b556SMiao Xie enum btrfs_rbio_ops { 55b4ee1782SOmar Sandoval BTRFS_RBIO_WRITE, 56b4ee1782SOmar Sandoval BTRFS_RBIO_READ_REBUILD, 57b4ee1782SOmar Sandoval BTRFS_RBIO_PARITY_SCRUB, 58b4ee1782SOmar Sandoval BTRFS_RBIO_REBUILD_MISSING, 591b94b556SMiao Xie }; 601b94b556SMiao Xie 6153b381b3SDavid Woodhouse struct btrfs_raid_bio { 6253b381b3SDavid Woodhouse struct btrfs_fs_info *fs_info; 6353b381b3SDavid Woodhouse struct btrfs_bio *bbio; 6453b381b3SDavid Woodhouse 6553b381b3SDavid Woodhouse /* while we're doing rmw on a stripe 6653b381b3SDavid Woodhouse * we put it into a hash table so we can 6753b381b3SDavid Woodhouse * lock the stripe and merge more rbios 6853b381b3SDavid Woodhouse * into it. 6953b381b3SDavid Woodhouse */ 7053b381b3SDavid Woodhouse struct list_head hash_list; 7153b381b3SDavid Woodhouse 7253b381b3SDavid Woodhouse /* 734ae10b3aSChris Mason * LRU list for the stripe cache 744ae10b3aSChris Mason */ 754ae10b3aSChris Mason struct list_head stripe_cache; 764ae10b3aSChris Mason 774ae10b3aSChris Mason /* 7853b381b3SDavid Woodhouse * for scheduling work in the helper threads 7953b381b3SDavid Woodhouse */ 8053b381b3SDavid Woodhouse struct btrfs_work work; 8153b381b3SDavid Woodhouse 8253b381b3SDavid Woodhouse /* 8353b381b3SDavid Woodhouse * bio list and bio_list_lock are used 8453b381b3SDavid Woodhouse * to add more bios into the stripe 8553b381b3SDavid Woodhouse * in hopes of avoiding the full rmw 8653b381b3SDavid Woodhouse */ 8753b381b3SDavid Woodhouse struct bio_list bio_list; 8853b381b3SDavid Woodhouse spinlock_t bio_list_lock; 8953b381b3SDavid Woodhouse 906ac0f488SChris Mason /* also protected by the bio_list_lock, the 916ac0f488SChris Mason * plug list is used by the plugging code 926ac0f488SChris Mason * to collect partial bios while plugged. The 936ac0f488SChris Mason * stripe locking code also uses it to hand off 9453b381b3SDavid Woodhouse * the stripe lock to the next pending IO 9553b381b3SDavid Woodhouse */ 9653b381b3SDavid Woodhouse struct list_head plug_list; 9753b381b3SDavid Woodhouse 9853b381b3SDavid Woodhouse /* 9953b381b3SDavid Woodhouse * flags that tell us if it is safe to 10053b381b3SDavid Woodhouse * merge with this bio 10153b381b3SDavid Woodhouse */ 10253b381b3SDavid Woodhouse unsigned long flags; 10353b381b3SDavid Woodhouse 10453b381b3SDavid Woodhouse /* size of each individual stripe on disk */ 10553b381b3SDavid Woodhouse int stripe_len; 10653b381b3SDavid Woodhouse 10753b381b3SDavid Woodhouse /* number of data stripes (no p/q) */ 10853b381b3SDavid Woodhouse int nr_data; 10953b381b3SDavid Woodhouse 1102c8cdd6eSMiao Xie int real_stripes; 1112c8cdd6eSMiao Xie 1125a6ac9eaSMiao Xie int stripe_npages; 11353b381b3SDavid Woodhouse /* 11453b381b3SDavid Woodhouse * set if we're doing a parity rebuild 11553b381b3SDavid Woodhouse * for a read from higher up, which is handled 11653b381b3SDavid Woodhouse * differently from a parity rebuild as part of 11753b381b3SDavid Woodhouse * rmw 11853b381b3SDavid Woodhouse */ 1191b94b556SMiao Xie enum btrfs_rbio_ops operation; 12053b381b3SDavid Woodhouse 12153b381b3SDavid Woodhouse /* first bad stripe */ 12253b381b3SDavid Woodhouse int faila; 12353b381b3SDavid Woodhouse 12453b381b3SDavid Woodhouse /* second bad stripe (for raid6 use) */ 12553b381b3SDavid Woodhouse int failb; 12653b381b3SDavid Woodhouse 1275a6ac9eaSMiao Xie int scrubp; 12853b381b3SDavid Woodhouse /* 12953b381b3SDavid Woodhouse * number of pages needed to represent the full 13053b381b3SDavid Woodhouse * stripe 13153b381b3SDavid Woodhouse */ 13253b381b3SDavid Woodhouse int nr_pages; 13353b381b3SDavid Woodhouse 13453b381b3SDavid Woodhouse /* 13553b381b3SDavid Woodhouse * size of all the bios in the bio_list. This 13653b381b3SDavid Woodhouse * helps us decide if the rbio maps to a full 13753b381b3SDavid Woodhouse * stripe or not 13853b381b3SDavid Woodhouse */ 13953b381b3SDavid Woodhouse int bio_list_bytes; 14053b381b3SDavid Woodhouse 1414245215dSMiao Xie int generic_bio_cnt; 1424245215dSMiao Xie 143dec95574SElena Reshetova refcount_t refs; 14453b381b3SDavid Woodhouse 145b89e1b01SMiao Xie atomic_t stripes_pending; 146b89e1b01SMiao Xie 147b89e1b01SMiao Xie atomic_t error; 14853b381b3SDavid Woodhouse /* 14953b381b3SDavid Woodhouse * these are two arrays of pointers. We allocate the 15053b381b3SDavid Woodhouse * rbio big enough to hold them both and setup their 15153b381b3SDavid Woodhouse * locations when the rbio is allocated 15253b381b3SDavid Woodhouse */ 15353b381b3SDavid Woodhouse 15453b381b3SDavid Woodhouse /* pointers to pages that we allocated for 15553b381b3SDavid Woodhouse * reading/writing stripes directly from the disk (including P/Q) 15653b381b3SDavid Woodhouse */ 15753b381b3SDavid Woodhouse struct page **stripe_pages; 15853b381b3SDavid Woodhouse 15953b381b3SDavid Woodhouse /* 16053b381b3SDavid Woodhouse * pointers to the pages in the bio_list. Stored 16153b381b3SDavid Woodhouse * here for faster lookup 16253b381b3SDavid Woodhouse */ 16353b381b3SDavid Woodhouse struct page **bio_pages; 1645a6ac9eaSMiao Xie 1655a6ac9eaSMiao Xie /* 1665a6ac9eaSMiao Xie * bitmap to record which horizontal stripe has data 1675a6ac9eaSMiao Xie */ 1685a6ac9eaSMiao Xie unsigned long *dbitmap; 1691389053eSKees Cook 1701389053eSKees Cook /* allocated with real_stripes-many pointers for finish_*() calls */ 1711389053eSKees Cook void **finish_pointers; 1721389053eSKees Cook 1731389053eSKees Cook /* allocated with stripe_npages-many bits for finish_*() calls */ 1741389053eSKees Cook unsigned long *finish_pbitmap; 17553b381b3SDavid Woodhouse }; 17653b381b3SDavid Woodhouse 17753b381b3SDavid Woodhouse static int __raid56_parity_recover(struct btrfs_raid_bio *rbio); 17853b381b3SDavid Woodhouse static noinline void finish_rmw(struct btrfs_raid_bio *rbio); 17953b381b3SDavid Woodhouse static void rmw_work(struct btrfs_work *work); 18053b381b3SDavid Woodhouse static void read_rebuild_work(struct btrfs_work *work); 18153b381b3SDavid Woodhouse static int fail_bio_stripe(struct btrfs_raid_bio *rbio, struct bio *bio); 18253b381b3SDavid Woodhouse static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed); 18353b381b3SDavid Woodhouse static void __free_raid_bio(struct btrfs_raid_bio *rbio); 18453b381b3SDavid Woodhouse static void index_rbio_pages(struct btrfs_raid_bio *rbio); 18553b381b3SDavid Woodhouse static int alloc_rbio_pages(struct btrfs_raid_bio *rbio); 18653b381b3SDavid Woodhouse 1875a6ac9eaSMiao Xie static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, 1885a6ac9eaSMiao Xie int need_check); 189a81b747dSDavid Sterba static void scrub_parity_work(struct btrfs_work *work); 1905a6ac9eaSMiao Xie 191ac638859SDavid Sterba static void start_async_work(struct btrfs_raid_bio *rbio, btrfs_func_t work_func) 192ac638859SDavid Sterba { 193a0cac0ecSOmar Sandoval btrfs_init_work(&rbio->work, work_func, NULL, NULL); 194ac638859SDavid Sterba btrfs_queue_work(rbio->fs_info->rmw_workers, &rbio->work); 195ac638859SDavid Sterba } 196ac638859SDavid Sterba 19753b381b3SDavid Woodhouse /* 19853b381b3SDavid Woodhouse * the stripe hash table is used for locking, and to collect 19953b381b3SDavid Woodhouse * bios in hopes of making a full stripe 20053b381b3SDavid Woodhouse */ 20153b381b3SDavid Woodhouse int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info) 20253b381b3SDavid Woodhouse { 20353b381b3SDavid Woodhouse struct btrfs_stripe_hash_table *table; 20453b381b3SDavid Woodhouse struct btrfs_stripe_hash_table *x; 20553b381b3SDavid Woodhouse struct btrfs_stripe_hash *cur; 20653b381b3SDavid Woodhouse struct btrfs_stripe_hash *h; 20753b381b3SDavid Woodhouse int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS; 20853b381b3SDavid Woodhouse int i; 20953b381b3SDavid Woodhouse 21053b381b3SDavid Woodhouse if (info->stripe_hash_table) 21153b381b3SDavid Woodhouse return 0; 21253b381b3SDavid Woodhouse 21383c8266aSDavid Sterba /* 21483c8266aSDavid Sterba * The table is large, starting with order 4 and can go as high as 21583c8266aSDavid Sterba * order 7 in case lock debugging is turned on. 21683c8266aSDavid Sterba * 21783c8266aSDavid Sterba * Try harder to allocate and fallback to vmalloc to lower the chance 21883c8266aSDavid Sterba * of a failing mount. 21983c8266aSDavid Sterba */ 220ee787f95SDavid Sterba table = kvzalloc(struct_size(table, table, num_entries), GFP_KERNEL); 22153b381b3SDavid Woodhouse if (!table) 22253b381b3SDavid Woodhouse return -ENOMEM; 22353b381b3SDavid Woodhouse 2244ae10b3aSChris Mason spin_lock_init(&table->cache_lock); 2254ae10b3aSChris Mason INIT_LIST_HEAD(&table->stripe_cache); 2264ae10b3aSChris Mason 22753b381b3SDavid Woodhouse h = table->table; 22853b381b3SDavid Woodhouse 22953b381b3SDavid Woodhouse for (i = 0; i < num_entries; i++) { 23053b381b3SDavid Woodhouse cur = h + i; 23153b381b3SDavid Woodhouse INIT_LIST_HEAD(&cur->hash_list); 23253b381b3SDavid Woodhouse spin_lock_init(&cur->lock); 23353b381b3SDavid Woodhouse } 23453b381b3SDavid Woodhouse 23553b381b3SDavid Woodhouse x = cmpxchg(&info->stripe_hash_table, NULL, table); 236f749303bSWang Shilong if (x) 237f749303bSWang Shilong kvfree(x); 23853b381b3SDavid Woodhouse return 0; 23953b381b3SDavid Woodhouse } 24053b381b3SDavid Woodhouse 24153b381b3SDavid Woodhouse /* 2424ae10b3aSChris Mason * caching an rbio means to copy anything from the 2434ae10b3aSChris Mason * bio_pages array into the stripe_pages array. We 2444ae10b3aSChris Mason * use the page uptodate bit in the stripe cache array 2454ae10b3aSChris Mason * to indicate if it has valid data 2464ae10b3aSChris Mason * 2474ae10b3aSChris Mason * once the caching is done, we set the cache ready 2484ae10b3aSChris Mason * bit. 2494ae10b3aSChris Mason */ 2504ae10b3aSChris Mason static void cache_rbio_pages(struct btrfs_raid_bio *rbio) 2514ae10b3aSChris Mason { 2524ae10b3aSChris Mason int i; 2534ae10b3aSChris Mason char *s; 2544ae10b3aSChris Mason char *d; 2554ae10b3aSChris Mason int ret; 2564ae10b3aSChris Mason 2574ae10b3aSChris Mason ret = alloc_rbio_pages(rbio); 2584ae10b3aSChris Mason if (ret) 2594ae10b3aSChris Mason return; 2604ae10b3aSChris Mason 2614ae10b3aSChris Mason for (i = 0; i < rbio->nr_pages; i++) { 2624ae10b3aSChris Mason if (!rbio->bio_pages[i]) 2634ae10b3aSChris Mason continue; 2644ae10b3aSChris Mason 2654ae10b3aSChris Mason s = kmap(rbio->bio_pages[i]); 2664ae10b3aSChris Mason d = kmap(rbio->stripe_pages[i]); 2674ae10b3aSChris Mason 26869d24804SDavid Sterba copy_page(d, s); 2694ae10b3aSChris Mason 2704ae10b3aSChris Mason kunmap(rbio->bio_pages[i]); 2714ae10b3aSChris Mason kunmap(rbio->stripe_pages[i]); 2724ae10b3aSChris Mason SetPageUptodate(rbio->stripe_pages[i]); 2734ae10b3aSChris Mason } 2744ae10b3aSChris Mason set_bit(RBIO_CACHE_READY_BIT, &rbio->flags); 2754ae10b3aSChris Mason } 2764ae10b3aSChris Mason 2774ae10b3aSChris Mason /* 27853b381b3SDavid Woodhouse * we hash on the first logical address of the stripe 27953b381b3SDavid Woodhouse */ 28053b381b3SDavid Woodhouse static int rbio_bucket(struct btrfs_raid_bio *rbio) 28153b381b3SDavid Woodhouse { 2828e5cfb55SZhao Lei u64 num = rbio->bbio->raid_map[0]; 28353b381b3SDavid Woodhouse 28453b381b3SDavid Woodhouse /* 28553b381b3SDavid Woodhouse * we shift down quite a bit. We're using byte 28653b381b3SDavid Woodhouse * addressing, and most of the lower bits are zeros. 28753b381b3SDavid Woodhouse * This tends to upset hash_64, and it consistently 28853b381b3SDavid Woodhouse * returns just one or two different values. 28953b381b3SDavid Woodhouse * 29053b381b3SDavid Woodhouse * shifting off the lower bits fixes things. 29153b381b3SDavid Woodhouse */ 29253b381b3SDavid Woodhouse return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS); 29353b381b3SDavid Woodhouse } 29453b381b3SDavid Woodhouse 29553b381b3SDavid Woodhouse /* 2964ae10b3aSChris Mason * stealing an rbio means taking all the uptodate pages from the stripe 2974ae10b3aSChris Mason * array in the source rbio and putting them into the destination rbio 2984ae10b3aSChris Mason */ 2994ae10b3aSChris Mason static void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest) 3004ae10b3aSChris Mason { 3014ae10b3aSChris Mason int i; 3024ae10b3aSChris Mason struct page *s; 3034ae10b3aSChris Mason struct page *d; 3044ae10b3aSChris Mason 3054ae10b3aSChris Mason if (!test_bit(RBIO_CACHE_READY_BIT, &src->flags)) 3064ae10b3aSChris Mason return; 3074ae10b3aSChris Mason 3084ae10b3aSChris Mason for (i = 0; i < dest->nr_pages; i++) { 3094ae10b3aSChris Mason s = src->stripe_pages[i]; 3104ae10b3aSChris Mason if (!s || !PageUptodate(s)) { 3114ae10b3aSChris Mason continue; 3124ae10b3aSChris Mason } 3134ae10b3aSChris Mason 3144ae10b3aSChris Mason d = dest->stripe_pages[i]; 3154ae10b3aSChris Mason if (d) 3164ae10b3aSChris Mason __free_page(d); 3174ae10b3aSChris Mason 3184ae10b3aSChris Mason dest->stripe_pages[i] = s; 3194ae10b3aSChris Mason src->stripe_pages[i] = NULL; 3204ae10b3aSChris Mason } 3214ae10b3aSChris Mason } 3224ae10b3aSChris Mason 3234ae10b3aSChris Mason /* 32453b381b3SDavid Woodhouse * merging means we take the bio_list from the victim and 32553b381b3SDavid Woodhouse * splice it into the destination. The victim should 32653b381b3SDavid Woodhouse * be discarded afterwards. 32753b381b3SDavid Woodhouse * 32853b381b3SDavid Woodhouse * must be called with dest->rbio_list_lock held 32953b381b3SDavid Woodhouse */ 33053b381b3SDavid Woodhouse static void merge_rbio(struct btrfs_raid_bio *dest, 33153b381b3SDavid Woodhouse struct btrfs_raid_bio *victim) 33253b381b3SDavid Woodhouse { 33353b381b3SDavid Woodhouse bio_list_merge(&dest->bio_list, &victim->bio_list); 33453b381b3SDavid Woodhouse dest->bio_list_bytes += victim->bio_list_bytes; 3354245215dSMiao Xie dest->generic_bio_cnt += victim->generic_bio_cnt; 33653b381b3SDavid Woodhouse bio_list_init(&victim->bio_list); 33753b381b3SDavid Woodhouse } 33853b381b3SDavid Woodhouse 33953b381b3SDavid Woodhouse /* 3404ae10b3aSChris Mason * used to prune items that are in the cache. The caller 3414ae10b3aSChris Mason * must hold the hash table lock. 3424ae10b3aSChris Mason */ 3434ae10b3aSChris Mason static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio) 3444ae10b3aSChris Mason { 3454ae10b3aSChris Mason int bucket = rbio_bucket(rbio); 3464ae10b3aSChris Mason struct btrfs_stripe_hash_table *table; 3474ae10b3aSChris Mason struct btrfs_stripe_hash *h; 3484ae10b3aSChris Mason int freeit = 0; 3494ae10b3aSChris Mason 3504ae10b3aSChris Mason /* 3514ae10b3aSChris Mason * check the bit again under the hash table lock. 3524ae10b3aSChris Mason */ 3534ae10b3aSChris Mason if (!test_bit(RBIO_CACHE_BIT, &rbio->flags)) 3544ae10b3aSChris Mason return; 3554ae10b3aSChris Mason 3564ae10b3aSChris Mason table = rbio->fs_info->stripe_hash_table; 3574ae10b3aSChris Mason h = table->table + bucket; 3584ae10b3aSChris Mason 3594ae10b3aSChris Mason /* hold the lock for the bucket because we may be 3604ae10b3aSChris Mason * removing it from the hash table 3614ae10b3aSChris Mason */ 3624ae10b3aSChris Mason spin_lock(&h->lock); 3634ae10b3aSChris Mason 3644ae10b3aSChris Mason /* 3654ae10b3aSChris Mason * hold the lock for the bio list because we need 3664ae10b3aSChris Mason * to make sure the bio list is empty 3674ae10b3aSChris Mason */ 3684ae10b3aSChris Mason spin_lock(&rbio->bio_list_lock); 3694ae10b3aSChris Mason 3704ae10b3aSChris Mason if (test_and_clear_bit(RBIO_CACHE_BIT, &rbio->flags)) { 3714ae10b3aSChris Mason list_del_init(&rbio->stripe_cache); 3724ae10b3aSChris Mason table->cache_size -= 1; 3734ae10b3aSChris Mason freeit = 1; 3744ae10b3aSChris Mason 3754ae10b3aSChris Mason /* if the bio list isn't empty, this rbio is 3764ae10b3aSChris Mason * still involved in an IO. We take it out 3774ae10b3aSChris Mason * of the cache list, and drop the ref that 3784ae10b3aSChris Mason * was held for the list. 3794ae10b3aSChris Mason * 3804ae10b3aSChris Mason * If the bio_list was empty, we also remove 3814ae10b3aSChris Mason * the rbio from the hash_table, and drop 3824ae10b3aSChris Mason * the corresponding ref 3834ae10b3aSChris Mason */ 3844ae10b3aSChris Mason if (bio_list_empty(&rbio->bio_list)) { 3854ae10b3aSChris Mason if (!list_empty(&rbio->hash_list)) { 3864ae10b3aSChris Mason list_del_init(&rbio->hash_list); 387dec95574SElena Reshetova refcount_dec(&rbio->refs); 3884ae10b3aSChris Mason BUG_ON(!list_empty(&rbio->plug_list)); 3894ae10b3aSChris Mason } 3904ae10b3aSChris Mason } 3914ae10b3aSChris Mason } 3924ae10b3aSChris Mason 3934ae10b3aSChris Mason spin_unlock(&rbio->bio_list_lock); 3944ae10b3aSChris Mason spin_unlock(&h->lock); 3954ae10b3aSChris Mason 3964ae10b3aSChris Mason if (freeit) 3974ae10b3aSChris Mason __free_raid_bio(rbio); 3984ae10b3aSChris Mason } 3994ae10b3aSChris Mason 4004ae10b3aSChris Mason /* 4014ae10b3aSChris Mason * prune a given rbio from the cache 4024ae10b3aSChris Mason */ 4034ae10b3aSChris Mason static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio) 4044ae10b3aSChris Mason { 4054ae10b3aSChris Mason struct btrfs_stripe_hash_table *table; 4064ae10b3aSChris Mason unsigned long flags; 4074ae10b3aSChris Mason 4084ae10b3aSChris Mason if (!test_bit(RBIO_CACHE_BIT, &rbio->flags)) 4094ae10b3aSChris Mason return; 4104ae10b3aSChris Mason 4114ae10b3aSChris Mason table = rbio->fs_info->stripe_hash_table; 4124ae10b3aSChris Mason 4134ae10b3aSChris Mason spin_lock_irqsave(&table->cache_lock, flags); 4144ae10b3aSChris Mason __remove_rbio_from_cache(rbio); 4154ae10b3aSChris Mason spin_unlock_irqrestore(&table->cache_lock, flags); 4164ae10b3aSChris Mason } 4174ae10b3aSChris Mason 4184ae10b3aSChris Mason /* 4194ae10b3aSChris Mason * remove everything in the cache 4204ae10b3aSChris Mason */ 42148a3b636SEric Sandeen static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info) 4224ae10b3aSChris Mason { 4234ae10b3aSChris Mason struct btrfs_stripe_hash_table *table; 4244ae10b3aSChris Mason unsigned long flags; 4254ae10b3aSChris Mason struct btrfs_raid_bio *rbio; 4264ae10b3aSChris Mason 4274ae10b3aSChris Mason table = info->stripe_hash_table; 4284ae10b3aSChris Mason 4294ae10b3aSChris Mason spin_lock_irqsave(&table->cache_lock, flags); 4304ae10b3aSChris Mason while (!list_empty(&table->stripe_cache)) { 4314ae10b3aSChris Mason rbio = list_entry(table->stripe_cache.next, 4324ae10b3aSChris Mason struct btrfs_raid_bio, 4334ae10b3aSChris Mason stripe_cache); 4344ae10b3aSChris Mason __remove_rbio_from_cache(rbio); 4354ae10b3aSChris Mason } 4364ae10b3aSChris Mason spin_unlock_irqrestore(&table->cache_lock, flags); 4374ae10b3aSChris Mason } 4384ae10b3aSChris Mason 4394ae10b3aSChris Mason /* 4404ae10b3aSChris Mason * remove all cached entries and free the hash table 4414ae10b3aSChris Mason * used by unmount 44253b381b3SDavid Woodhouse */ 44353b381b3SDavid Woodhouse void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info) 44453b381b3SDavid Woodhouse { 44553b381b3SDavid Woodhouse if (!info->stripe_hash_table) 44653b381b3SDavid Woodhouse return; 4474ae10b3aSChris Mason btrfs_clear_rbio_cache(info); 448f749303bSWang Shilong kvfree(info->stripe_hash_table); 44953b381b3SDavid Woodhouse info->stripe_hash_table = NULL; 45053b381b3SDavid Woodhouse } 45153b381b3SDavid Woodhouse 45253b381b3SDavid Woodhouse /* 4534ae10b3aSChris Mason * insert an rbio into the stripe cache. It 4544ae10b3aSChris Mason * must have already been prepared by calling 4554ae10b3aSChris Mason * cache_rbio_pages 4564ae10b3aSChris Mason * 4574ae10b3aSChris Mason * If this rbio was already cached, it gets 4584ae10b3aSChris Mason * moved to the front of the lru. 4594ae10b3aSChris Mason * 4604ae10b3aSChris Mason * If the size of the rbio cache is too big, we 4614ae10b3aSChris Mason * prune an item. 4624ae10b3aSChris Mason */ 4634ae10b3aSChris Mason static void cache_rbio(struct btrfs_raid_bio *rbio) 4644ae10b3aSChris Mason { 4654ae10b3aSChris Mason struct btrfs_stripe_hash_table *table; 4664ae10b3aSChris Mason unsigned long flags; 4674ae10b3aSChris Mason 4684ae10b3aSChris Mason if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags)) 4694ae10b3aSChris Mason return; 4704ae10b3aSChris Mason 4714ae10b3aSChris Mason table = rbio->fs_info->stripe_hash_table; 4724ae10b3aSChris Mason 4734ae10b3aSChris Mason spin_lock_irqsave(&table->cache_lock, flags); 4744ae10b3aSChris Mason spin_lock(&rbio->bio_list_lock); 4754ae10b3aSChris Mason 4764ae10b3aSChris Mason /* bump our ref if we were not in the list before */ 4774ae10b3aSChris Mason if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags)) 478dec95574SElena Reshetova refcount_inc(&rbio->refs); 4794ae10b3aSChris Mason 4804ae10b3aSChris Mason if (!list_empty(&rbio->stripe_cache)){ 4814ae10b3aSChris Mason list_move(&rbio->stripe_cache, &table->stripe_cache); 4824ae10b3aSChris Mason } else { 4834ae10b3aSChris Mason list_add(&rbio->stripe_cache, &table->stripe_cache); 4844ae10b3aSChris Mason table->cache_size += 1; 4854ae10b3aSChris Mason } 4864ae10b3aSChris Mason 4874ae10b3aSChris Mason spin_unlock(&rbio->bio_list_lock); 4884ae10b3aSChris Mason 4894ae10b3aSChris Mason if (table->cache_size > RBIO_CACHE_SIZE) { 4904ae10b3aSChris Mason struct btrfs_raid_bio *found; 4914ae10b3aSChris Mason 4924ae10b3aSChris Mason found = list_entry(table->stripe_cache.prev, 4934ae10b3aSChris Mason struct btrfs_raid_bio, 4944ae10b3aSChris Mason stripe_cache); 4954ae10b3aSChris Mason 4964ae10b3aSChris Mason if (found != rbio) 4974ae10b3aSChris Mason __remove_rbio_from_cache(found); 4984ae10b3aSChris Mason } 4994ae10b3aSChris Mason 5004ae10b3aSChris Mason spin_unlock_irqrestore(&table->cache_lock, flags); 5014ae10b3aSChris Mason } 5024ae10b3aSChris Mason 5034ae10b3aSChris Mason /* 50453b381b3SDavid Woodhouse * helper function to run the xor_blocks api. It is only 50553b381b3SDavid Woodhouse * able to do MAX_XOR_BLOCKS at a time, so we need to 50653b381b3SDavid Woodhouse * loop through. 50753b381b3SDavid Woodhouse */ 50853b381b3SDavid Woodhouse static void run_xor(void **pages, int src_cnt, ssize_t len) 50953b381b3SDavid Woodhouse { 51053b381b3SDavid Woodhouse int src_off = 0; 51153b381b3SDavid Woodhouse int xor_src_cnt = 0; 51253b381b3SDavid Woodhouse void *dest = pages[src_cnt]; 51353b381b3SDavid Woodhouse 51453b381b3SDavid Woodhouse while(src_cnt > 0) { 51553b381b3SDavid Woodhouse xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS); 51653b381b3SDavid Woodhouse xor_blocks(xor_src_cnt, len, dest, pages + src_off); 51753b381b3SDavid Woodhouse 51853b381b3SDavid Woodhouse src_cnt -= xor_src_cnt; 51953b381b3SDavid Woodhouse src_off += xor_src_cnt; 52053b381b3SDavid Woodhouse } 52153b381b3SDavid Woodhouse } 52253b381b3SDavid Woodhouse 52353b381b3SDavid Woodhouse /* 524176571a1SDavid Sterba * Returns true if the bio list inside this rbio covers an entire stripe (no 525176571a1SDavid Sterba * rmw required). 52653b381b3SDavid Woodhouse */ 52753b381b3SDavid Woodhouse static int rbio_is_full(struct btrfs_raid_bio *rbio) 52853b381b3SDavid Woodhouse { 52953b381b3SDavid Woodhouse unsigned long flags; 530176571a1SDavid Sterba unsigned long size = rbio->bio_list_bytes; 531176571a1SDavid Sterba int ret = 1; 53253b381b3SDavid Woodhouse 53353b381b3SDavid Woodhouse spin_lock_irqsave(&rbio->bio_list_lock, flags); 534176571a1SDavid Sterba if (size != rbio->nr_data * rbio->stripe_len) 535176571a1SDavid Sterba ret = 0; 536176571a1SDavid Sterba BUG_ON(size > rbio->nr_data * rbio->stripe_len); 53753b381b3SDavid Woodhouse spin_unlock_irqrestore(&rbio->bio_list_lock, flags); 538176571a1SDavid Sterba 53953b381b3SDavid Woodhouse return ret; 54053b381b3SDavid Woodhouse } 54153b381b3SDavid Woodhouse 54253b381b3SDavid Woodhouse /* 54353b381b3SDavid Woodhouse * returns 1 if it is safe to merge two rbios together. 54453b381b3SDavid Woodhouse * The merging is safe if the two rbios correspond to 54553b381b3SDavid Woodhouse * the same stripe and if they are both going in the same 54653b381b3SDavid Woodhouse * direction (read vs write), and if neither one is 54753b381b3SDavid Woodhouse * locked for final IO 54853b381b3SDavid Woodhouse * 54953b381b3SDavid Woodhouse * The caller is responsible for locking such that 55053b381b3SDavid Woodhouse * rmw_locked is safe to test 55153b381b3SDavid Woodhouse */ 55253b381b3SDavid Woodhouse static int rbio_can_merge(struct btrfs_raid_bio *last, 55353b381b3SDavid Woodhouse struct btrfs_raid_bio *cur) 55453b381b3SDavid Woodhouse { 55553b381b3SDavid Woodhouse if (test_bit(RBIO_RMW_LOCKED_BIT, &last->flags) || 55653b381b3SDavid Woodhouse test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) 55753b381b3SDavid Woodhouse return 0; 55853b381b3SDavid Woodhouse 5594ae10b3aSChris Mason /* 5604ae10b3aSChris Mason * we can't merge with cached rbios, since the 5614ae10b3aSChris Mason * idea is that when we merge the destination 5624ae10b3aSChris Mason * rbio is going to run our IO for us. We can 56301327610SNicholas D Steeves * steal from cached rbios though, other functions 5644ae10b3aSChris Mason * handle that. 5654ae10b3aSChris Mason */ 5664ae10b3aSChris Mason if (test_bit(RBIO_CACHE_BIT, &last->flags) || 5674ae10b3aSChris Mason test_bit(RBIO_CACHE_BIT, &cur->flags)) 5684ae10b3aSChris Mason return 0; 5694ae10b3aSChris Mason 5708e5cfb55SZhao Lei if (last->bbio->raid_map[0] != 5718e5cfb55SZhao Lei cur->bbio->raid_map[0]) 57253b381b3SDavid Woodhouse return 0; 57353b381b3SDavid Woodhouse 5745a6ac9eaSMiao Xie /* we can't merge with different operations */ 5755a6ac9eaSMiao Xie if (last->operation != cur->operation) 57653b381b3SDavid Woodhouse return 0; 5775a6ac9eaSMiao Xie /* 5785a6ac9eaSMiao Xie * We've need read the full stripe from the drive. 5795a6ac9eaSMiao Xie * check and repair the parity and write the new results. 5805a6ac9eaSMiao Xie * 5815a6ac9eaSMiao Xie * We're not allowed to add any new bios to the 5825a6ac9eaSMiao Xie * bio list here, anyone else that wants to 5835a6ac9eaSMiao Xie * change this stripe needs to do their own rmw. 5845a6ac9eaSMiao Xie */ 585db34be19SLiu Bo if (last->operation == BTRFS_RBIO_PARITY_SCRUB) 5865a6ac9eaSMiao Xie return 0; 58753b381b3SDavid Woodhouse 588db34be19SLiu Bo if (last->operation == BTRFS_RBIO_REBUILD_MISSING) 589b4ee1782SOmar Sandoval return 0; 590b4ee1782SOmar Sandoval 591cc54ff62SLiu Bo if (last->operation == BTRFS_RBIO_READ_REBUILD) { 592cc54ff62SLiu Bo int fa = last->faila; 593cc54ff62SLiu Bo int fb = last->failb; 594cc54ff62SLiu Bo int cur_fa = cur->faila; 595cc54ff62SLiu Bo int cur_fb = cur->failb; 596cc54ff62SLiu Bo 597cc54ff62SLiu Bo if (last->faila >= last->failb) { 598cc54ff62SLiu Bo fa = last->failb; 599cc54ff62SLiu Bo fb = last->faila; 600cc54ff62SLiu Bo } 601cc54ff62SLiu Bo 602cc54ff62SLiu Bo if (cur->faila >= cur->failb) { 603cc54ff62SLiu Bo cur_fa = cur->failb; 604cc54ff62SLiu Bo cur_fb = cur->faila; 605cc54ff62SLiu Bo } 606cc54ff62SLiu Bo 607cc54ff62SLiu Bo if (fa != cur_fa || fb != cur_fb) 608cc54ff62SLiu Bo return 0; 609cc54ff62SLiu Bo } 61053b381b3SDavid Woodhouse return 1; 61153b381b3SDavid Woodhouse } 61253b381b3SDavid Woodhouse 613b7178a5fSZhao Lei static int rbio_stripe_page_index(struct btrfs_raid_bio *rbio, int stripe, 614b7178a5fSZhao Lei int index) 615b7178a5fSZhao Lei { 616b7178a5fSZhao Lei return stripe * rbio->stripe_npages + index; 617b7178a5fSZhao Lei } 618b7178a5fSZhao Lei 619b7178a5fSZhao Lei /* 620b7178a5fSZhao Lei * these are just the pages from the rbio array, not from anything 621b7178a5fSZhao Lei * the FS sent down to us 622b7178a5fSZhao Lei */ 623b7178a5fSZhao Lei static struct page *rbio_stripe_page(struct btrfs_raid_bio *rbio, int stripe, 624b7178a5fSZhao Lei int index) 625b7178a5fSZhao Lei { 626b7178a5fSZhao Lei return rbio->stripe_pages[rbio_stripe_page_index(rbio, stripe, index)]; 627b7178a5fSZhao Lei } 628b7178a5fSZhao Lei 62953b381b3SDavid Woodhouse /* 63053b381b3SDavid Woodhouse * helper to index into the pstripe 63153b381b3SDavid Woodhouse */ 63253b381b3SDavid Woodhouse static struct page *rbio_pstripe_page(struct btrfs_raid_bio *rbio, int index) 63353b381b3SDavid Woodhouse { 634b7178a5fSZhao Lei return rbio_stripe_page(rbio, rbio->nr_data, index); 63553b381b3SDavid Woodhouse } 63653b381b3SDavid Woodhouse 63753b381b3SDavid Woodhouse /* 63853b381b3SDavid Woodhouse * helper to index into the qstripe, returns null 63953b381b3SDavid Woodhouse * if there is no qstripe 64053b381b3SDavid Woodhouse */ 64153b381b3SDavid Woodhouse static struct page *rbio_qstripe_page(struct btrfs_raid_bio *rbio, int index) 64253b381b3SDavid Woodhouse { 6432c8cdd6eSMiao Xie if (rbio->nr_data + 1 == rbio->real_stripes) 64453b381b3SDavid Woodhouse return NULL; 645b7178a5fSZhao Lei return rbio_stripe_page(rbio, rbio->nr_data + 1, index); 64653b381b3SDavid Woodhouse } 64753b381b3SDavid Woodhouse 64853b381b3SDavid Woodhouse /* 64953b381b3SDavid Woodhouse * The first stripe in the table for a logical address 65053b381b3SDavid Woodhouse * has the lock. rbios are added in one of three ways: 65153b381b3SDavid Woodhouse * 65253b381b3SDavid Woodhouse * 1) Nobody has the stripe locked yet. The rbio is given 65353b381b3SDavid Woodhouse * the lock and 0 is returned. The caller must start the IO 65453b381b3SDavid Woodhouse * themselves. 65553b381b3SDavid Woodhouse * 65653b381b3SDavid Woodhouse * 2) Someone has the stripe locked, but we're able to merge 65753b381b3SDavid Woodhouse * with the lock owner. The rbio is freed and the IO will 65853b381b3SDavid Woodhouse * start automatically along with the existing rbio. 1 is returned. 65953b381b3SDavid Woodhouse * 66053b381b3SDavid Woodhouse * 3) Someone has the stripe locked, but we're not able to merge. 66153b381b3SDavid Woodhouse * The rbio is added to the lock owner's plug list, or merged into 66253b381b3SDavid Woodhouse * an rbio already on the plug list. When the lock owner unlocks, 66353b381b3SDavid Woodhouse * the next rbio on the list is run and the IO is started automatically. 66453b381b3SDavid Woodhouse * 1 is returned 66553b381b3SDavid Woodhouse * 66653b381b3SDavid Woodhouse * If we return 0, the caller still owns the rbio and must continue with 66753b381b3SDavid Woodhouse * IO submission. If we return 1, the caller must assume the rbio has 66853b381b3SDavid Woodhouse * already been freed. 66953b381b3SDavid Woodhouse */ 67053b381b3SDavid Woodhouse static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio) 67153b381b3SDavid Woodhouse { 672721860d5SJohannes Thumshirn struct btrfs_stripe_hash *h; 67353b381b3SDavid Woodhouse struct btrfs_raid_bio *cur; 67453b381b3SDavid Woodhouse struct btrfs_raid_bio *pending; 67553b381b3SDavid Woodhouse unsigned long flags; 67653b381b3SDavid Woodhouse struct btrfs_raid_bio *freeit = NULL; 6774ae10b3aSChris Mason struct btrfs_raid_bio *cache_drop = NULL; 67853b381b3SDavid Woodhouse int ret = 0; 67953b381b3SDavid Woodhouse 680721860d5SJohannes Thumshirn h = rbio->fs_info->stripe_hash_table->table + rbio_bucket(rbio); 681721860d5SJohannes Thumshirn 68253b381b3SDavid Woodhouse spin_lock_irqsave(&h->lock, flags); 68353b381b3SDavid Woodhouse list_for_each_entry(cur, &h->hash_list, hash_list) { 6849d6cb1b0SJohannes Thumshirn if (cur->bbio->raid_map[0] != rbio->bbio->raid_map[0]) 6859d6cb1b0SJohannes Thumshirn continue; 6869d6cb1b0SJohannes Thumshirn 68753b381b3SDavid Woodhouse spin_lock(&cur->bio_list_lock); 68853b381b3SDavid Woodhouse 6899d6cb1b0SJohannes Thumshirn /* Can we steal this cached rbio's pages? */ 6904ae10b3aSChris Mason if (bio_list_empty(&cur->bio_list) && 6914ae10b3aSChris Mason list_empty(&cur->plug_list) && 6924ae10b3aSChris Mason test_bit(RBIO_CACHE_BIT, &cur->flags) && 6934ae10b3aSChris Mason !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) { 6944ae10b3aSChris Mason list_del_init(&cur->hash_list); 695dec95574SElena Reshetova refcount_dec(&cur->refs); 6964ae10b3aSChris Mason 6974ae10b3aSChris Mason steal_rbio(cur, rbio); 6984ae10b3aSChris Mason cache_drop = cur; 6994ae10b3aSChris Mason spin_unlock(&cur->bio_list_lock); 7004ae10b3aSChris Mason 7014ae10b3aSChris Mason goto lockit; 7024ae10b3aSChris Mason } 7034ae10b3aSChris Mason 7049d6cb1b0SJohannes Thumshirn /* Can we merge into the lock owner? */ 70553b381b3SDavid Woodhouse if (rbio_can_merge(cur, rbio)) { 70653b381b3SDavid Woodhouse merge_rbio(cur, rbio); 70753b381b3SDavid Woodhouse spin_unlock(&cur->bio_list_lock); 70853b381b3SDavid Woodhouse freeit = rbio; 70953b381b3SDavid Woodhouse ret = 1; 71053b381b3SDavid Woodhouse goto out; 71153b381b3SDavid Woodhouse } 71253b381b3SDavid Woodhouse 7134ae10b3aSChris Mason 71453b381b3SDavid Woodhouse /* 7159d6cb1b0SJohannes Thumshirn * We couldn't merge with the running rbio, see if we can merge 7169d6cb1b0SJohannes Thumshirn * with the pending ones. We don't have to check for rmw_locked 7179d6cb1b0SJohannes Thumshirn * because there is no way they are inside finish_rmw right now 71853b381b3SDavid Woodhouse */ 7199d6cb1b0SJohannes Thumshirn list_for_each_entry(pending, &cur->plug_list, plug_list) { 72053b381b3SDavid Woodhouse if (rbio_can_merge(pending, rbio)) { 72153b381b3SDavid Woodhouse merge_rbio(pending, rbio); 72253b381b3SDavid Woodhouse spin_unlock(&cur->bio_list_lock); 72353b381b3SDavid Woodhouse freeit = rbio; 72453b381b3SDavid Woodhouse ret = 1; 72553b381b3SDavid Woodhouse goto out; 72653b381b3SDavid Woodhouse } 72753b381b3SDavid Woodhouse } 72853b381b3SDavid Woodhouse 7299d6cb1b0SJohannes Thumshirn /* 7309d6cb1b0SJohannes Thumshirn * No merging, put us on the tail of the plug list, our rbio 7319d6cb1b0SJohannes Thumshirn * will be started with the currently running rbio unlocks 73253b381b3SDavid Woodhouse */ 73353b381b3SDavid Woodhouse list_add_tail(&rbio->plug_list, &cur->plug_list); 73453b381b3SDavid Woodhouse spin_unlock(&cur->bio_list_lock); 73553b381b3SDavid Woodhouse ret = 1; 73653b381b3SDavid Woodhouse goto out; 73753b381b3SDavid Woodhouse } 7384ae10b3aSChris Mason lockit: 739dec95574SElena Reshetova refcount_inc(&rbio->refs); 74053b381b3SDavid Woodhouse list_add(&rbio->hash_list, &h->hash_list); 74153b381b3SDavid Woodhouse out: 74253b381b3SDavid Woodhouse spin_unlock_irqrestore(&h->lock, flags); 7434ae10b3aSChris Mason if (cache_drop) 7444ae10b3aSChris Mason remove_rbio_from_cache(cache_drop); 74553b381b3SDavid Woodhouse if (freeit) 74653b381b3SDavid Woodhouse __free_raid_bio(freeit); 74753b381b3SDavid Woodhouse return ret; 74853b381b3SDavid Woodhouse } 74953b381b3SDavid Woodhouse 75053b381b3SDavid Woodhouse /* 75153b381b3SDavid Woodhouse * called as rmw or parity rebuild is completed. If the plug list has more 75253b381b3SDavid Woodhouse * rbios waiting for this stripe, the next one on the list will be started 75353b381b3SDavid Woodhouse */ 75453b381b3SDavid Woodhouse static noinline void unlock_stripe(struct btrfs_raid_bio *rbio) 75553b381b3SDavid Woodhouse { 75653b381b3SDavid Woodhouse int bucket; 75753b381b3SDavid Woodhouse struct btrfs_stripe_hash *h; 75853b381b3SDavid Woodhouse unsigned long flags; 7594ae10b3aSChris Mason int keep_cache = 0; 76053b381b3SDavid Woodhouse 76153b381b3SDavid Woodhouse bucket = rbio_bucket(rbio); 76253b381b3SDavid Woodhouse h = rbio->fs_info->stripe_hash_table->table + bucket; 76353b381b3SDavid Woodhouse 7644ae10b3aSChris Mason if (list_empty(&rbio->plug_list)) 7654ae10b3aSChris Mason cache_rbio(rbio); 7664ae10b3aSChris Mason 76753b381b3SDavid Woodhouse spin_lock_irqsave(&h->lock, flags); 76853b381b3SDavid Woodhouse spin_lock(&rbio->bio_list_lock); 76953b381b3SDavid Woodhouse 77053b381b3SDavid Woodhouse if (!list_empty(&rbio->hash_list)) { 7714ae10b3aSChris Mason /* 7724ae10b3aSChris Mason * if we're still cached and there is no other IO 7734ae10b3aSChris Mason * to perform, just leave this rbio here for others 7744ae10b3aSChris Mason * to steal from later 7754ae10b3aSChris Mason */ 7764ae10b3aSChris Mason if (list_empty(&rbio->plug_list) && 7774ae10b3aSChris Mason test_bit(RBIO_CACHE_BIT, &rbio->flags)) { 7784ae10b3aSChris Mason keep_cache = 1; 7794ae10b3aSChris Mason clear_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); 7804ae10b3aSChris Mason BUG_ON(!bio_list_empty(&rbio->bio_list)); 7814ae10b3aSChris Mason goto done; 7824ae10b3aSChris Mason } 78353b381b3SDavid Woodhouse 78453b381b3SDavid Woodhouse list_del_init(&rbio->hash_list); 785dec95574SElena Reshetova refcount_dec(&rbio->refs); 78653b381b3SDavid Woodhouse 78753b381b3SDavid Woodhouse /* 78853b381b3SDavid Woodhouse * we use the plug list to hold all the rbios 78953b381b3SDavid Woodhouse * waiting for the chance to lock this stripe. 79053b381b3SDavid Woodhouse * hand the lock over to one of them. 79153b381b3SDavid Woodhouse */ 79253b381b3SDavid Woodhouse if (!list_empty(&rbio->plug_list)) { 79353b381b3SDavid Woodhouse struct btrfs_raid_bio *next; 79453b381b3SDavid Woodhouse struct list_head *head = rbio->plug_list.next; 79553b381b3SDavid Woodhouse 79653b381b3SDavid Woodhouse next = list_entry(head, struct btrfs_raid_bio, 79753b381b3SDavid Woodhouse plug_list); 79853b381b3SDavid Woodhouse 79953b381b3SDavid Woodhouse list_del_init(&rbio->plug_list); 80053b381b3SDavid Woodhouse 80153b381b3SDavid Woodhouse list_add(&next->hash_list, &h->hash_list); 802dec95574SElena Reshetova refcount_inc(&next->refs); 80353b381b3SDavid Woodhouse spin_unlock(&rbio->bio_list_lock); 80453b381b3SDavid Woodhouse spin_unlock_irqrestore(&h->lock, flags); 80553b381b3SDavid Woodhouse 8061b94b556SMiao Xie if (next->operation == BTRFS_RBIO_READ_REBUILD) 807e66d8d5aSDavid Sterba start_async_work(next, read_rebuild_work); 808b4ee1782SOmar Sandoval else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) { 809b4ee1782SOmar Sandoval steal_rbio(rbio, next); 810e66d8d5aSDavid Sterba start_async_work(next, read_rebuild_work); 811b4ee1782SOmar Sandoval } else if (next->operation == BTRFS_RBIO_WRITE) { 8124ae10b3aSChris Mason steal_rbio(rbio, next); 813cf6a4a75SDavid Sterba start_async_work(next, rmw_work); 8145a6ac9eaSMiao Xie } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) { 8155a6ac9eaSMiao Xie steal_rbio(rbio, next); 816a81b747dSDavid Sterba start_async_work(next, scrub_parity_work); 8174ae10b3aSChris Mason } 81853b381b3SDavid Woodhouse 81953b381b3SDavid Woodhouse goto done_nolock; 82053b381b3SDavid Woodhouse } 82153b381b3SDavid Woodhouse } 8224ae10b3aSChris Mason done: 82353b381b3SDavid Woodhouse spin_unlock(&rbio->bio_list_lock); 82453b381b3SDavid Woodhouse spin_unlock_irqrestore(&h->lock, flags); 82553b381b3SDavid Woodhouse 82653b381b3SDavid Woodhouse done_nolock: 8274ae10b3aSChris Mason if (!keep_cache) 8284ae10b3aSChris Mason remove_rbio_from_cache(rbio); 82953b381b3SDavid Woodhouse } 83053b381b3SDavid Woodhouse 83153b381b3SDavid Woodhouse static void __free_raid_bio(struct btrfs_raid_bio *rbio) 83253b381b3SDavid Woodhouse { 83353b381b3SDavid Woodhouse int i; 83453b381b3SDavid Woodhouse 835dec95574SElena Reshetova if (!refcount_dec_and_test(&rbio->refs)) 83653b381b3SDavid Woodhouse return; 83753b381b3SDavid Woodhouse 8384ae10b3aSChris Mason WARN_ON(!list_empty(&rbio->stripe_cache)); 83953b381b3SDavid Woodhouse WARN_ON(!list_empty(&rbio->hash_list)); 84053b381b3SDavid Woodhouse WARN_ON(!bio_list_empty(&rbio->bio_list)); 84153b381b3SDavid Woodhouse 84253b381b3SDavid Woodhouse for (i = 0; i < rbio->nr_pages; i++) { 84353b381b3SDavid Woodhouse if (rbio->stripe_pages[i]) { 84453b381b3SDavid Woodhouse __free_page(rbio->stripe_pages[i]); 84553b381b3SDavid Woodhouse rbio->stripe_pages[i] = NULL; 84653b381b3SDavid Woodhouse } 84753b381b3SDavid Woodhouse } 848af8e2d1dSMiao Xie 8496e9606d2SZhao Lei btrfs_put_bbio(rbio->bbio); 85053b381b3SDavid Woodhouse kfree(rbio); 85153b381b3SDavid Woodhouse } 85253b381b3SDavid Woodhouse 8537583d8d0SLiu Bo static void rbio_endio_bio_list(struct bio *cur, blk_status_t err) 85453b381b3SDavid Woodhouse { 8557583d8d0SLiu Bo struct bio *next; 8567583d8d0SLiu Bo 8577583d8d0SLiu Bo while (cur) { 8587583d8d0SLiu Bo next = cur->bi_next; 8597583d8d0SLiu Bo cur->bi_next = NULL; 8607583d8d0SLiu Bo cur->bi_status = err; 8617583d8d0SLiu Bo bio_endio(cur); 8627583d8d0SLiu Bo cur = next; 8637583d8d0SLiu Bo } 86453b381b3SDavid Woodhouse } 86553b381b3SDavid Woodhouse 86653b381b3SDavid Woodhouse /* 86753b381b3SDavid Woodhouse * this frees the rbio and runs through all the bios in the 86853b381b3SDavid Woodhouse * bio_list and calls end_io on them 86953b381b3SDavid Woodhouse */ 8704e4cbee9SChristoph Hellwig static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err) 87153b381b3SDavid Woodhouse { 87253b381b3SDavid Woodhouse struct bio *cur = bio_list_get(&rbio->bio_list); 8737583d8d0SLiu Bo struct bio *extra; 8744245215dSMiao Xie 8754245215dSMiao Xie if (rbio->generic_bio_cnt) 8764245215dSMiao Xie btrfs_bio_counter_sub(rbio->fs_info, rbio->generic_bio_cnt); 8774245215dSMiao Xie 8787583d8d0SLiu Bo /* 8797583d8d0SLiu Bo * At this moment, rbio->bio_list is empty, however since rbio does not 8807583d8d0SLiu Bo * always have RBIO_RMW_LOCKED_BIT set and rbio is still linked on the 8817583d8d0SLiu Bo * hash list, rbio may be merged with others so that rbio->bio_list 8827583d8d0SLiu Bo * becomes non-empty. 8837583d8d0SLiu Bo * Once unlock_stripe() is done, rbio->bio_list will not be updated any 8847583d8d0SLiu Bo * more and we can call bio_endio() on all queued bios. 8857583d8d0SLiu Bo */ 8867583d8d0SLiu Bo unlock_stripe(rbio); 8877583d8d0SLiu Bo extra = bio_list_get(&rbio->bio_list); 8887583d8d0SLiu Bo __free_raid_bio(rbio); 88953b381b3SDavid Woodhouse 8907583d8d0SLiu Bo rbio_endio_bio_list(cur, err); 8917583d8d0SLiu Bo if (extra) 8927583d8d0SLiu Bo rbio_endio_bio_list(extra, err); 89353b381b3SDavid Woodhouse } 89453b381b3SDavid Woodhouse 89553b381b3SDavid Woodhouse /* 89653b381b3SDavid Woodhouse * end io function used by finish_rmw. When we finally 89753b381b3SDavid Woodhouse * get here, we've written a full stripe 89853b381b3SDavid Woodhouse */ 8994246a0b6SChristoph Hellwig static void raid_write_end_io(struct bio *bio) 90053b381b3SDavid Woodhouse { 90153b381b3SDavid Woodhouse struct btrfs_raid_bio *rbio = bio->bi_private; 9024e4cbee9SChristoph Hellwig blk_status_t err = bio->bi_status; 903a6111d11SZhao Lei int max_errors; 90453b381b3SDavid Woodhouse 90553b381b3SDavid Woodhouse if (err) 90653b381b3SDavid Woodhouse fail_bio_stripe(rbio, bio); 90753b381b3SDavid Woodhouse 90853b381b3SDavid Woodhouse bio_put(bio); 90953b381b3SDavid Woodhouse 910b89e1b01SMiao Xie if (!atomic_dec_and_test(&rbio->stripes_pending)) 91153b381b3SDavid Woodhouse return; 91253b381b3SDavid Woodhouse 91358efbc9fSOmar Sandoval err = BLK_STS_OK; 91453b381b3SDavid Woodhouse 91553b381b3SDavid Woodhouse /* OK, we have read all the stripes we need to. */ 916a6111d11SZhao Lei max_errors = (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) ? 917a6111d11SZhao Lei 0 : rbio->bbio->max_errors; 918a6111d11SZhao Lei if (atomic_read(&rbio->error) > max_errors) 9194e4cbee9SChristoph Hellwig err = BLK_STS_IOERR; 92053b381b3SDavid Woodhouse 9214246a0b6SChristoph Hellwig rbio_orig_end_io(rbio, err); 92253b381b3SDavid Woodhouse } 92353b381b3SDavid Woodhouse 92453b381b3SDavid Woodhouse /* 92553b381b3SDavid Woodhouse * the read/modify/write code wants to use the original bio for 92653b381b3SDavid Woodhouse * any pages it included, and then use the rbio for everything 92753b381b3SDavid Woodhouse * else. This function decides if a given index (stripe number) 92853b381b3SDavid Woodhouse * and page number in that stripe fall inside the original bio 92953b381b3SDavid Woodhouse * or the rbio. 93053b381b3SDavid Woodhouse * 93153b381b3SDavid Woodhouse * if you set bio_list_only, you'll get a NULL back for any ranges 93253b381b3SDavid Woodhouse * that are outside the bio_list 93353b381b3SDavid Woodhouse * 93453b381b3SDavid Woodhouse * This doesn't take any refs on anything, you get a bare page pointer 93553b381b3SDavid Woodhouse * and the caller must bump refs as required. 93653b381b3SDavid Woodhouse * 93753b381b3SDavid Woodhouse * You must call index_rbio_pages once before you can trust 93853b381b3SDavid Woodhouse * the answers from this function. 93953b381b3SDavid Woodhouse */ 94053b381b3SDavid Woodhouse static struct page *page_in_rbio(struct btrfs_raid_bio *rbio, 94153b381b3SDavid Woodhouse int index, int pagenr, int bio_list_only) 94253b381b3SDavid Woodhouse { 94353b381b3SDavid Woodhouse int chunk_page; 94453b381b3SDavid Woodhouse struct page *p = NULL; 94553b381b3SDavid Woodhouse 94653b381b3SDavid Woodhouse chunk_page = index * (rbio->stripe_len >> PAGE_SHIFT) + pagenr; 94753b381b3SDavid Woodhouse 94853b381b3SDavid Woodhouse spin_lock_irq(&rbio->bio_list_lock); 94953b381b3SDavid Woodhouse p = rbio->bio_pages[chunk_page]; 95053b381b3SDavid Woodhouse spin_unlock_irq(&rbio->bio_list_lock); 95153b381b3SDavid Woodhouse 95253b381b3SDavid Woodhouse if (p || bio_list_only) 95353b381b3SDavid Woodhouse return p; 95453b381b3SDavid Woodhouse 95553b381b3SDavid Woodhouse return rbio->stripe_pages[chunk_page]; 95653b381b3SDavid Woodhouse } 95753b381b3SDavid Woodhouse 95853b381b3SDavid Woodhouse /* 95953b381b3SDavid Woodhouse * number of pages we need for the entire stripe across all the 96053b381b3SDavid Woodhouse * drives 96153b381b3SDavid Woodhouse */ 96253b381b3SDavid Woodhouse static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes) 96353b381b3SDavid Woodhouse { 96409cbfeafSKirill A. Shutemov return DIV_ROUND_UP(stripe_len, PAGE_SIZE) * nr_stripes; 96553b381b3SDavid Woodhouse } 96653b381b3SDavid Woodhouse 96753b381b3SDavid Woodhouse /* 96853b381b3SDavid Woodhouse * allocation and initial setup for the btrfs_raid_bio. Not 96953b381b3SDavid Woodhouse * this does not allocate any pages for rbio->pages. 97053b381b3SDavid Woodhouse */ 9712ff7e61eSJeff Mahoney static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info, 9722ff7e61eSJeff Mahoney struct btrfs_bio *bbio, 9732ff7e61eSJeff Mahoney u64 stripe_len) 97453b381b3SDavid Woodhouse { 97553b381b3SDavid Woodhouse struct btrfs_raid_bio *rbio; 97653b381b3SDavid Woodhouse int nr_data = 0; 9772c8cdd6eSMiao Xie int real_stripes = bbio->num_stripes - bbio->num_tgtdevs; 9782c8cdd6eSMiao Xie int num_pages = rbio_nr_pages(stripe_len, real_stripes); 9795a6ac9eaSMiao Xie int stripe_npages = DIV_ROUND_UP(stripe_len, PAGE_SIZE); 98053b381b3SDavid Woodhouse void *p; 98153b381b3SDavid Woodhouse 9821389053eSKees Cook rbio = kzalloc(sizeof(*rbio) + 9831389053eSKees Cook sizeof(*rbio->stripe_pages) * num_pages + 9841389053eSKees Cook sizeof(*rbio->bio_pages) * num_pages + 9851389053eSKees Cook sizeof(*rbio->finish_pointers) * real_stripes + 9861389053eSKees Cook sizeof(*rbio->dbitmap) * BITS_TO_LONGS(stripe_npages) + 9871389053eSKees Cook sizeof(*rbio->finish_pbitmap) * 9881389053eSKees Cook BITS_TO_LONGS(stripe_npages), 9891389053eSKees Cook GFP_NOFS); 990af8e2d1dSMiao Xie if (!rbio) 99153b381b3SDavid Woodhouse return ERR_PTR(-ENOMEM); 99253b381b3SDavid Woodhouse 99353b381b3SDavid Woodhouse bio_list_init(&rbio->bio_list); 99453b381b3SDavid Woodhouse INIT_LIST_HEAD(&rbio->plug_list); 99553b381b3SDavid Woodhouse spin_lock_init(&rbio->bio_list_lock); 9964ae10b3aSChris Mason INIT_LIST_HEAD(&rbio->stripe_cache); 99753b381b3SDavid Woodhouse INIT_LIST_HEAD(&rbio->hash_list); 99853b381b3SDavid Woodhouse rbio->bbio = bbio; 9992ff7e61eSJeff Mahoney rbio->fs_info = fs_info; 100053b381b3SDavid Woodhouse rbio->stripe_len = stripe_len; 100153b381b3SDavid Woodhouse rbio->nr_pages = num_pages; 10022c8cdd6eSMiao Xie rbio->real_stripes = real_stripes; 10035a6ac9eaSMiao Xie rbio->stripe_npages = stripe_npages; 100453b381b3SDavid Woodhouse rbio->faila = -1; 100553b381b3SDavid Woodhouse rbio->failb = -1; 1006dec95574SElena Reshetova refcount_set(&rbio->refs, 1); 1007b89e1b01SMiao Xie atomic_set(&rbio->error, 0); 1008b89e1b01SMiao Xie atomic_set(&rbio->stripes_pending, 0); 100953b381b3SDavid Woodhouse 101053b381b3SDavid Woodhouse /* 10111389053eSKees Cook * the stripe_pages, bio_pages, etc arrays point to the extra 101253b381b3SDavid Woodhouse * memory we allocated past the end of the rbio 101353b381b3SDavid Woodhouse */ 101453b381b3SDavid Woodhouse p = rbio + 1; 10151389053eSKees Cook #define CONSUME_ALLOC(ptr, count) do { \ 10161389053eSKees Cook ptr = p; \ 10171389053eSKees Cook p = (unsigned char *)p + sizeof(*(ptr)) * (count); \ 10181389053eSKees Cook } while (0) 10191389053eSKees Cook CONSUME_ALLOC(rbio->stripe_pages, num_pages); 10201389053eSKees Cook CONSUME_ALLOC(rbio->bio_pages, num_pages); 10211389053eSKees Cook CONSUME_ALLOC(rbio->finish_pointers, real_stripes); 10221389053eSKees Cook CONSUME_ALLOC(rbio->dbitmap, BITS_TO_LONGS(stripe_npages)); 10231389053eSKees Cook CONSUME_ALLOC(rbio->finish_pbitmap, BITS_TO_LONGS(stripe_npages)); 10241389053eSKees Cook #undef CONSUME_ALLOC 102553b381b3SDavid Woodhouse 102610f11900SZhao Lei if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID5) 102710f11900SZhao Lei nr_data = real_stripes - 1; 102810f11900SZhao Lei else if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) 10292c8cdd6eSMiao Xie nr_data = real_stripes - 2; 103053b381b3SDavid Woodhouse else 103110f11900SZhao Lei BUG(); 103253b381b3SDavid Woodhouse 103353b381b3SDavid Woodhouse rbio->nr_data = nr_data; 103453b381b3SDavid Woodhouse return rbio; 103553b381b3SDavid Woodhouse } 103653b381b3SDavid Woodhouse 103753b381b3SDavid Woodhouse /* allocate pages for all the stripes in the bio, including parity */ 103853b381b3SDavid Woodhouse static int alloc_rbio_pages(struct btrfs_raid_bio *rbio) 103953b381b3SDavid Woodhouse { 104053b381b3SDavid Woodhouse int i; 104153b381b3SDavid Woodhouse struct page *page; 104253b381b3SDavid Woodhouse 104353b381b3SDavid Woodhouse for (i = 0; i < rbio->nr_pages; i++) { 104453b381b3SDavid Woodhouse if (rbio->stripe_pages[i]) 104553b381b3SDavid Woodhouse continue; 104653b381b3SDavid Woodhouse page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 104753b381b3SDavid Woodhouse if (!page) 104853b381b3SDavid Woodhouse return -ENOMEM; 104953b381b3SDavid Woodhouse rbio->stripe_pages[i] = page; 105053b381b3SDavid Woodhouse } 105153b381b3SDavid Woodhouse return 0; 105253b381b3SDavid Woodhouse } 105353b381b3SDavid Woodhouse 1054b7178a5fSZhao Lei /* only allocate pages for p/q stripes */ 105553b381b3SDavid Woodhouse static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio) 105653b381b3SDavid Woodhouse { 105753b381b3SDavid Woodhouse int i; 105853b381b3SDavid Woodhouse struct page *page; 105953b381b3SDavid Woodhouse 1060b7178a5fSZhao Lei i = rbio_stripe_page_index(rbio, rbio->nr_data, 0); 106153b381b3SDavid Woodhouse 106253b381b3SDavid Woodhouse for (; i < rbio->nr_pages; i++) { 106353b381b3SDavid Woodhouse if (rbio->stripe_pages[i]) 106453b381b3SDavid Woodhouse continue; 106553b381b3SDavid Woodhouse page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 106653b381b3SDavid Woodhouse if (!page) 106753b381b3SDavid Woodhouse return -ENOMEM; 106853b381b3SDavid Woodhouse rbio->stripe_pages[i] = page; 106953b381b3SDavid Woodhouse } 107053b381b3SDavid Woodhouse return 0; 107153b381b3SDavid Woodhouse } 107253b381b3SDavid Woodhouse 107353b381b3SDavid Woodhouse /* 107453b381b3SDavid Woodhouse * add a single page from a specific stripe into our list of bios for IO 107553b381b3SDavid Woodhouse * this will try to merge into existing bios if possible, and returns 107653b381b3SDavid Woodhouse * zero if all went well. 107753b381b3SDavid Woodhouse */ 107848a3b636SEric Sandeen static int rbio_add_io_page(struct btrfs_raid_bio *rbio, 107953b381b3SDavid Woodhouse struct bio_list *bio_list, 108053b381b3SDavid Woodhouse struct page *page, 108153b381b3SDavid Woodhouse int stripe_nr, 108253b381b3SDavid Woodhouse unsigned long page_index, 108353b381b3SDavid Woodhouse unsigned long bio_max_len) 108453b381b3SDavid Woodhouse { 108553b381b3SDavid Woodhouse struct bio *last = bio_list->tail; 108653b381b3SDavid Woodhouse int ret; 108753b381b3SDavid Woodhouse struct bio *bio; 108853b381b3SDavid Woodhouse struct btrfs_bio_stripe *stripe; 108953b381b3SDavid Woodhouse u64 disk_start; 109053b381b3SDavid Woodhouse 109153b381b3SDavid Woodhouse stripe = &rbio->bbio->stripes[stripe_nr]; 109209cbfeafSKirill A. Shutemov disk_start = stripe->physical + (page_index << PAGE_SHIFT); 109353b381b3SDavid Woodhouse 109453b381b3SDavid Woodhouse /* if the device is missing, just fail this stripe */ 109553b381b3SDavid Woodhouse if (!stripe->dev->bdev) 109653b381b3SDavid Woodhouse return fail_rbio_index(rbio, stripe_nr); 109753b381b3SDavid Woodhouse 109853b381b3SDavid Woodhouse /* see if we can add this page onto our existing bio */ 109953b381b3SDavid Woodhouse if (last) { 11001201b58bSDavid Sterba u64 last_end = last->bi_iter.bi_sector << 9; 11014f024f37SKent Overstreet last_end += last->bi_iter.bi_size; 110253b381b3SDavid Woodhouse 110353b381b3SDavid Woodhouse /* 110453b381b3SDavid Woodhouse * we can't merge these if they are from different 110553b381b3SDavid Woodhouse * devices or if they are not contiguous 110653b381b3SDavid Woodhouse */ 1107f90ae76aSNikolay Borisov if (last_end == disk_start && !last->bi_status && 1108*309dca30SChristoph Hellwig last->bi_bdev == stripe->dev->bdev) { 110909cbfeafSKirill A. Shutemov ret = bio_add_page(last, page, PAGE_SIZE, 0); 111009cbfeafSKirill A. Shutemov if (ret == PAGE_SIZE) 111153b381b3SDavid Woodhouse return 0; 111253b381b3SDavid Woodhouse } 111353b381b3SDavid Woodhouse } 111453b381b3SDavid Woodhouse 111553b381b3SDavid Woodhouse /* put a new bio on the list */ 1116c5e4c3d7SDavid Sterba bio = btrfs_io_bio_alloc(bio_max_len >> PAGE_SHIFT ?: 1); 1117c31efbdfSNikolay Borisov btrfs_io_bio(bio)->device = stripe->dev; 11184f024f37SKent Overstreet bio->bi_iter.bi_size = 0; 111974d46992SChristoph Hellwig bio_set_dev(bio, stripe->dev->bdev); 11204f024f37SKent Overstreet bio->bi_iter.bi_sector = disk_start >> 9; 112153b381b3SDavid Woodhouse 112209cbfeafSKirill A. Shutemov bio_add_page(bio, page, PAGE_SIZE, 0); 112353b381b3SDavid Woodhouse bio_list_add(bio_list, bio); 112453b381b3SDavid Woodhouse return 0; 112553b381b3SDavid Woodhouse } 112653b381b3SDavid Woodhouse 112753b381b3SDavid Woodhouse /* 112853b381b3SDavid Woodhouse * while we're doing the read/modify/write cycle, we could 112953b381b3SDavid Woodhouse * have errors in reading pages off the disk. This checks 113053b381b3SDavid Woodhouse * for errors and if we're not able to read the page it'll 113153b381b3SDavid Woodhouse * trigger parity reconstruction. The rmw will be finished 113253b381b3SDavid Woodhouse * after we've reconstructed the failed stripes 113353b381b3SDavid Woodhouse */ 113453b381b3SDavid Woodhouse static void validate_rbio_for_rmw(struct btrfs_raid_bio *rbio) 113553b381b3SDavid Woodhouse { 113653b381b3SDavid Woodhouse if (rbio->faila >= 0 || rbio->failb >= 0) { 11372c8cdd6eSMiao Xie BUG_ON(rbio->faila == rbio->real_stripes - 1); 113853b381b3SDavid Woodhouse __raid56_parity_recover(rbio); 113953b381b3SDavid Woodhouse } else { 114053b381b3SDavid Woodhouse finish_rmw(rbio); 114153b381b3SDavid Woodhouse } 114253b381b3SDavid Woodhouse } 114353b381b3SDavid Woodhouse 114453b381b3SDavid Woodhouse /* 114553b381b3SDavid Woodhouse * helper function to walk our bio list and populate the bio_pages array with 114653b381b3SDavid Woodhouse * the result. This seems expensive, but it is faster than constantly 114753b381b3SDavid Woodhouse * searching through the bio list as we setup the IO in finish_rmw or stripe 114853b381b3SDavid Woodhouse * reconstruction. 114953b381b3SDavid Woodhouse * 115053b381b3SDavid Woodhouse * This must be called before you trust the answers from page_in_rbio 115153b381b3SDavid Woodhouse */ 115253b381b3SDavid Woodhouse static void index_rbio_pages(struct btrfs_raid_bio *rbio) 115353b381b3SDavid Woodhouse { 115453b381b3SDavid Woodhouse struct bio *bio; 115553b381b3SDavid Woodhouse u64 start; 115653b381b3SDavid Woodhouse unsigned long stripe_offset; 115753b381b3SDavid Woodhouse unsigned long page_index; 115853b381b3SDavid Woodhouse 115953b381b3SDavid Woodhouse spin_lock_irq(&rbio->bio_list_lock); 116053b381b3SDavid Woodhouse bio_list_for_each(bio, &rbio->bio_list) { 11616592e58cSFilipe Manana struct bio_vec bvec; 11626592e58cSFilipe Manana struct bvec_iter iter; 11636592e58cSFilipe Manana int i = 0; 11646592e58cSFilipe Manana 11651201b58bSDavid Sterba start = bio->bi_iter.bi_sector << 9; 11668e5cfb55SZhao Lei stripe_offset = start - rbio->bbio->raid_map[0]; 116709cbfeafSKirill A. Shutemov page_index = stripe_offset >> PAGE_SHIFT; 116853b381b3SDavid Woodhouse 11696592e58cSFilipe Manana if (bio_flagged(bio, BIO_CLONED)) 11706592e58cSFilipe Manana bio->bi_iter = btrfs_io_bio(bio)->iter; 11716592e58cSFilipe Manana 11726592e58cSFilipe Manana bio_for_each_segment(bvec, bio, iter) { 11736592e58cSFilipe Manana rbio->bio_pages[page_index + i] = bvec.bv_page; 11746592e58cSFilipe Manana i++; 11756592e58cSFilipe Manana } 117653b381b3SDavid Woodhouse } 117753b381b3SDavid Woodhouse spin_unlock_irq(&rbio->bio_list_lock); 117853b381b3SDavid Woodhouse } 117953b381b3SDavid Woodhouse 118053b381b3SDavid Woodhouse /* 118153b381b3SDavid Woodhouse * this is called from one of two situations. We either 118253b381b3SDavid Woodhouse * have a full stripe from the higher layers, or we've read all 118353b381b3SDavid Woodhouse * the missing bits off disk. 118453b381b3SDavid Woodhouse * 118553b381b3SDavid Woodhouse * This will calculate the parity and then send down any 118653b381b3SDavid Woodhouse * changed blocks. 118753b381b3SDavid Woodhouse */ 118853b381b3SDavid Woodhouse static noinline void finish_rmw(struct btrfs_raid_bio *rbio) 118953b381b3SDavid Woodhouse { 119053b381b3SDavid Woodhouse struct btrfs_bio *bbio = rbio->bbio; 11911389053eSKees Cook void **pointers = rbio->finish_pointers; 119253b381b3SDavid Woodhouse int nr_data = rbio->nr_data; 119353b381b3SDavid Woodhouse int stripe; 119453b381b3SDavid Woodhouse int pagenr; 1195c17af965SDavid Sterba bool has_qstripe; 119653b381b3SDavid Woodhouse struct bio_list bio_list; 119753b381b3SDavid Woodhouse struct bio *bio; 119853b381b3SDavid Woodhouse int ret; 119953b381b3SDavid Woodhouse 120053b381b3SDavid Woodhouse bio_list_init(&bio_list); 120153b381b3SDavid Woodhouse 1202c17af965SDavid Sterba if (rbio->real_stripes - rbio->nr_data == 1) 1203c17af965SDavid Sterba has_qstripe = false; 1204c17af965SDavid Sterba else if (rbio->real_stripes - rbio->nr_data == 2) 1205c17af965SDavid Sterba has_qstripe = true; 1206c17af965SDavid Sterba else 120753b381b3SDavid Woodhouse BUG(); 120853b381b3SDavid Woodhouse 120953b381b3SDavid Woodhouse /* at this point we either have a full stripe, 121053b381b3SDavid Woodhouse * or we've read the full stripe from the drive. 121153b381b3SDavid Woodhouse * recalculate the parity and write the new results. 121253b381b3SDavid Woodhouse * 121353b381b3SDavid Woodhouse * We're not allowed to add any new bios to the 121453b381b3SDavid Woodhouse * bio list here, anyone else that wants to 121553b381b3SDavid Woodhouse * change this stripe needs to do their own rmw. 121653b381b3SDavid Woodhouse */ 121753b381b3SDavid Woodhouse spin_lock_irq(&rbio->bio_list_lock); 121853b381b3SDavid Woodhouse set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); 121953b381b3SDavid Woodhouse spin_unlock_irq(&rbio->bio_list_lock); 122053b381b3SDavid Woodhouse 1221b89e1b01SMiao Xie atomic_set(&rbio->error, 0); 122253b381b3SDavid Woodhouse 122353b381b3SDavid Woodhouse /* 122453b381b3SDavid Woodhouse * now that we've set rmw_locked, run through the 122553b381b3SDavid Woodhouse * bio list one last time and map the page pointers 12264ae10b3aSChris Mason * 12274ae10b3aSChris Mason * We don't cache full rbios because we're assuming 12284ae10b3aSChris Mason * the higher layers are unlikely to use this area of 12294ae10b3aSChris Mason * the disk again soon. If they do use it again, 12304ae10b3aSChris Mason * hopefully they will send another full bio. 123153b381b3SDavid Woodhouse */ 123253b381b3SDavid Woodhouse index_rbio_pages(rbio); 12334ae10b3aSChris Mason if (!rbio_is_full(rbio)) 12344ae10b3aSChris Mason cache_rbio_pages(rbio); 12354ae10b3aSChris Mason else 12364ae10b3aSChris Mason clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); 123753b381b3SDavid Woodhouse 1238915e2290SZhao Lei for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { 123953b381b3SDavid Woodhouse struct page *p; 124053b381b3SDavid Woodhouse /* first collect one page from each data stripe */ 124153b381b3SDavid Woodhouse for (stripe = 0; stripe < nr_data; stripe++) { 124253b381b3SDavid Woodhouse p = page_in_rbio(rbio, stripe, pagenr, 0); 124353b381b3SDavid Woodhouse pointers[stripe] = kmap(p); 124453b381b3SDavid Woodhouse } 124553b381b3SDavid Woodhouse 124653b381b3SDavid Woodhouse /* then add the parity stripe */ 124753b381b3SDavid Woodhouse p = rbio_pstripe_page(rbio, pagenr); 124853b381b3SDavid Woodhouse SetPageUptodate(p); 124953b381b3SDavid Woodhouse pointers[stripe++] = kmap(p); 125053b381b3SDavid Woodhouse 1251c17af965SDavid Sterba if (has_qstripe) { 125253b381b3SDavid Woodhouse 125353b381b3SDavid Woodhouse /* 125453b381b3SDavid Woodhouse * raid6, add the qstripe and call the 125553b381b3SDavid Woodhouse * library function to fill in our p/q 125653b381b3SDavid Woodhouse */ 125753b381b3SDavid Woodhouse p = rbio_qstripe_page(rbio, pagenr); 125853b381b3SDavid Woodhouse SetPageUptodate(p); 125953b381b3SDavid Woodhouse pointers[stripe++] = kmap(p); 126053b381b3SDavid Woodhouse 12612c8cdd6eSMiao Xie raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE, 126253b381b3SDavid Woodhouse pointers); 126353b381b3SDavid Woodhouse } else { 126453b381b3SDavid Woodhouse /* raid5 */ 126569d24804SDavid Sterba copy_page(pointers[nr_data], pointers[0]); 126609cbfeafSKirill A. Shutemov run_xor(pointers + 1, nr_data - 1, PAGE_SIZE); 126753b381b3SDavid Woodhouse } 126853b381b3SDavid Woodhouse 126953b381b3SDavid Woodhouse 12702c8cdd6eSMiao Xie for (stripe = 0; stripe < rbio->real_stripes; stripe++) 127153b381b3SDavid Woodhouse kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); 127253b381b3SDavid Woodhouse } 127353b381b3SDavid Woodhouse 127453b381b3SDavid Woodhouse /* 127553b381b3SDavid Woodhouse * time to start writing. Make bios for everything from the 127653b381b3SDavid Woodhouse * higher layers (the bio_list in our rbio) and our p/q. Ignore 127753b381b3SDavid Woodhouse * everything else. 127853b381b3SDavid Woodhouse */ 12792c8cdd6eSMiao Xie for (stripe = 0; stripe < rbio->real_stripes; stripe++) { 1280915e2290SZhao Lei for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { 128153b381b3SDavid Woodhouse struct page *page; 128253b381b3SDavid Woodhouse if (stripe < rbio->nr_data) { 128353b381b3SDavid Woodhouse page = page_in_rbio(rbio, stripe, pagenr, 1); 128453b381b3SDavid Woodhouse if (!page) 128553b381b3SDavid Woodhouse continue; 128653b381b3SDavid Woodhouse } else { 128753b381b3SDavid Woodhouse page = rbio_stripe_page(rbio, stripe, pagenr); 128853b381b3SDavid Woodhouse } 128953b381b3SDavid Woodhouse 129053b381b3SDavid Woodhouse ret = rbio_add_io_page(rbio, &bio_list, 129153b381b3SDavid Woodhouse page, stripe, pagenr, rbio->stripe_len); 129253b381b3SDavid Woodhouse if (ret) 129353b381b3SDavid Woodhouse goto cleanup; 129453b381b3SDavid Woodhouse } 129553b381b3SDavid Woodhouse } 129653b381b3SDavid Woodhouse 12972c8cdd6eSMiao Xie if (likely(!bbio->num_tgtdevs)) 12982c8cdd6eSMiao Xie goto write_data; 12992c8cdd6eSMiao Xie 13002c8cdd6eSMiao Xie for (stripe = 0; stripe < rbio->real_stripes; stripe++) { 13012c8cdd6eSMiao Xie if (!bbio->tgtdev_map[stripe]) 13022c8cdd6eSMiao Xie continue; 13032c8cdd6eSMiao Xie 1304915e2290SZhao Lei for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { 13052c8cdd6eSMiao Xie struct page *page; 13062c8cdd6eSMiao Xie if (stripe < rbio->nr_data) { 13072c8cdd6eSMiao Xie page = page_in_rbio(rbio, stripe, pagenr, 1); 13082c8cdd6eSMiao Xie if (!page) 13092c8cdd6eSMiao Xie continue; 13102c8cdd6eSMiao Xie } else { 13112c8cdd6eSMiao Xie page = rbio_stripe_page(rbio, stripe, pagenr); 13122c8cdd6eSMiao Xie } 13132c8cdd6eSMiao Xie 13142c8cdd6eSMiao Xie ret = rbio_add_io_page(rbio, &bio_list, page, 13152c8cdd6eSMiao Xie rbio->bbio->tgtdev_map[stripe], 13162c8cdd6eSMiao Xie pagenr, rbio->stripe_len); 13172c8cdd6eSMiao Xie if (ret) 13182c8cdd6eSMiao Xie goto cleanup; 13192c8cdd6eSMiao Xie } 13202c8cdd6eSMiao Xie } 13212c8cdd6eSMiao Xie 13222c8cdd6eSMiao Xie write_data: 1323b89e1b01SMiao Xie atomic_set(&rbio->stripes_pending, bio_list_size(&bio_list)); 1324b89e1b01SMiao Xie BUG_ON(atomic_read(&rbio->stripes_pending) == 0); 132553b381b3SDavid Woodhouse 1326bf28a605SNikolay Borisov while ((bio = bio_list_pop(&bio_list))) { 132753b381b3SDavid Woodhouse bio->bi_private = rbio; 132853b381b3SDavid Woodhouse bio->bi_end_io = raid_write_end_io; 1329ebcc3263SDavid Sterba bio->bi_opf = REQ_OP_WRITE; 13304e49ea4aSMike Christie 13314e49ea4aSMike Christie submit_bio(bio); 133253b381b3SDavid Woodhouse } 133353b381b3SDavid Woodhouse return; 133453b381b3SDavid Woodhouse 133553b381b3SDavid Woodhouse cleanup: 133658efbc9fSOmar Sandoval rbio_orig_end_io(rbio, BLK_STS_IOERR); 1337785884fcSLiu Bo 1338785884fcSLiu Bo while ((bio = bio_list_pop(&bio_list))) 1339785884fcSLiu Bo bio_put(bio); 134053b381b3SDavid Woodhouse } 134153b381b3SDavid Woodhouse 134253b381b3SDavid Woodhouse /* 134353b381b3SDavid Woodhouse * helper to find the stripe number for a given bio. Used to figure out which 134453b381b3SDavid Woodhouse * stripe has failed. This expects the bio to correspond to a physical disk, 134553b381b3SDavid Woodhouse * so it looks up based on physical sector numbers. 134653b381b3SDavid Woodhouse */ 134753b381b3SDavid Woodhouse static int find_bio_stripe(struct btrfs_raid_bio *rbio, 134853b381b3SDavid Woodhouse struct bio *bio) 134953b381b3SDavid Woodhouse { 13504f024f37SKent Overstreet u64 physical = bio->bi_iter.bi_sector; 135153b381b3SDavid Woodhouse int i; 135253b381b3SDavid Woodhouse struct btrfs_bio_stripe *stripe; 135353b381b3SDavid Woodhouse 135453b381b3SDavid Woodhouse physical <<= 9; 135553b381b3SDavid Woodhouse 135653b381b3SDavid Woodhouse for (i = 0; i < rbio->bbio->num_stripes; i++) { 135753b381b3SDavid Woodhouse stripe = &rbio->bbio->stripes[i]; 135883025863SNikolay Borisov if (in_range(physical, stripe->physical, rbio->stripe_len) && 1359*309dca30SChristoph Hellwig stripe->dev->bdev && bio->bi_bdev == stripe->dev->bdev) { 136053b381b3SDavid Woodhouse return i; 136153b381b3SDavid Woodhouse } 136253b381b3SDavid Woodhouse } 136353b381b3SDavid Woodhouse return -1; 136453b381b3SDavid Woodhouse } 136553b381b3SDavid Woodhouse 136653b381b3SDavid Woodhouse /* 136753b381b3SDavid Woodhouse * helper to find the stripe number for a given 136853b381b3SDavid Woodhouse * bio (before mapping). Used to figure out which stripe has 136953b381b3SDavid Woodhouse * failed. This looks up based on logical block numbers. 137053b381b3SDavid Woodhouse */ 137153b381b3SDavid Woodhouse static int find_logical_bio_stripe(struct btrfs_raid_bio *rbio, 137253b381b3SDavid Woodhouse struct bio *bio) 137353b381b3SDavid Woodhouse { 13741201b58bSDavid Sterba u64 logical = bio->bi_iter.bi_sector << 9; 137553b381b3SDavid Woodhouse int i; 137653b381b3SDavid Woodhouse 137753b381b3SDavid Woodhouse for (i = 0; i < rbio->nr_data; i++) { 137883025863SNikolay Borisov u64 stripe_start = rbio->bbio->raid_map[i]; 137983025863SNikolay Borisov 138083025863SNikolay Borisov if (in_range(logical, stripe_start, rbio->stripe_len)) 138153b381b3SDavid Woodhouse return i; 138253b381b3SDavid Woodhouse } 138353b381b3SDavid Woodhouse return -1; 138453b381b3SDavid Woodhouse } 138553b381b3SDavid Woodhouse 138653b381b3SDavid Woodhouse /* 138753b381b3SDavid Woodhouse * returns -EIO if we had too many failures 138853b381b3SDavid Woodhouse */ 138953b381b3SDavid Woodhouse static int fail_rbio_index(struct btrfs_raid_bio *rbio, int failed) 139053b381b3SDavid Woodhouse { 139153b381b3SDavid Woodhouse unsigned long flags; 139253b381b3SDavid Woodhouse int ret = 0; 139353b381b3SDavid Woodhouse 139453b381b3SDavid Woodhouse spin_lock_irqsave(&rbio->bio_list_lock, flags); 139553b381b3SDavid Woodhouse 139653b381b3SDavid Woodhouse /* we already know this stripe is bad, move on */ 139753b381b3SDavid Woodhouse if (rbio->faila == failed || rbio->failb == failed) 139853b381b3SDavid Woodhouse goto out; 139953b381b3SDavid Woodhouse 140053b381b3SDavid Woodhouse if (rbio->faila == -1) { 140153b381b3SDavid Woodhouse /* first failure on this rbio */ 140253b381b3SDavid Woodhouse rbio->faila = failed; 1403b89e1b01SMiao Xie atomic_inc(&rbio->error); 140453b381b3SDavid Woodhouse } else if (rbio->failb == -1) { 140553b381b3SDavid Woodhouse /* second failure on this rbio */ 140653b381b3SDavid Woodhouse rbio->failb = failed; 1407b89e1b01SMiao Xie atomic_inc(&rbio->error); 140853b381b3SDavid Woodhouse } else { 140953b381b3SDavid Woodhouse ret = -EIO; 141053b381b3SDavid Woodhouse } 141153b381b3SDavid Woodhouse out: 141253b381b3SDavid Woodhouse spin_unlock_irqrestore(&rbio->bio_list_lock, flags); 141353b381b3SDavid Woodhouse 141453b381b3SDavid Woodhouse return ret; 141553b381b3SDavid Woodhouse } 141653b381b3SDavid Woodhouse 141753b381b3SDavid Woodhouse /* 141853b381b3SDavid Woodhouse * helper to fail a stripe based on a physical disk 141953b381b3SDavid Woodhouse * bio. 142053b381b3SDavid Woodhouse */ 142153b381b3SDavid Woodhouse static int fail_bio_stripe(struct btrfs_raid_bio *rbio, 142253b381b3SDavid Woodhouse struct bio *bio) 142353b381b3SDavid Woodhouse { 142453b381b3SDavid Woodhouse int failed = find_bio_stripe(rbio, bio); 142553b381b3SDavid Woodhouse 142653b381b3SDavid Woodhouse if (failed < 0) 142753b381b3SDavid Woodhouse return -EIO; 142853b381b3SDavid Woodhouse 142953b381b3SDavid Woodhouse return fail_rbio_index(rbio, failed); 143053b381b3SDavid Woodhouse } 143153b381b3SDavid Woodhouse 143253b381b3SDavid Woodhouse /* 143353b381b3SDavid Woodhouse * this sets each page in the bio uptodate. It should only be used on private 143453b381b3SDavid Woodhouse * rbio pages, nothing that comes in from the higher layers 143553b381b3SDavid Woodhouse */ 143653b381b3SDavid Woodhouse static void set_bio_pages_uptodate(struct bio *bio) 143753b381b3SDavid Woodhouse { 14380198e5b7SLiu Bo struct bio_vec *bvec; 14396dc4f100SMing Lei struct bvec_iter_all iter_all; 144053b381b3SDavid Woodhouse 14410198e5b7SLiu Bo ASSERT(!bio_flagged(bio, BIO_CLONED)); 14426592e58cSFilipe Manana 14432b070cfeSChristoph Hellwig bio_for_each_segment_all(bvec, bio, iter_all) 14440198e5b7SLiu Bo SetPageUptodate(bvec->bv_page); 144553b381b3SDavid Woodhouse } 144653b381b3SDavid Woodhouse 144753b381b3SDavid Woodhouse /* 144853b381b3SDavid Woodhouse * end io for the read phase of the rmw cycle. All the bios here are physical 144953b381b3SDavid Woodhouse * stripe bios we've read from the disk so we can recalculate the parity of the 145053b381b3SDavid Woodhouse * stripe. 145153b381b3SDavid Woodhouse * 145253b381b3SDavid Woodhouse * This will usually kick off finish_rmw once all the bios are read in, but it 145353b381b3SDavid Woodhouse * may trigger parity reconstruction if we had any errors along the way 145453b381b3SDavid Woodhouse */ 14554246a0b6SChristoph Hellwig static void raid_rmw_end_io(struct bio *bio) 145653b381b3SDavid Woodhouse { 145753b381b3SDavid Woodhouse struct btrfs_raid_bio *rbio = bio->bi_private; 145853b381b3SDavid Woodhouse 14594e4cbee9SChristoph Hellwig if (bio->bi_status) 146053b381b3SDavid Woodhouse fail_bio_stripe(rbio, bio); 146153b381b3SDavid Woodhouse else 146253b381b3SDavid Woodhouse set_bio_pages_uptodate(bio); 146353b381b3SDavid Woodhouse 146453b381b3SDavid Woodhouse bio_put(bio); 146553b381b3SDavid Woodhouse 1466b89e1b01SMiao Xie if (!atomic_dec_and_test(&rbio->stripes_pending)) 146753b381b3SDavid Woodhouse return; 146853b381b3SDavid Woodhouse 1469b89e1b01SMiao Xie if (atomic_read(&rbio->error) > rbio->bbio->max_errors) 147053b381b3SDavid Woodhouse goto cleanup; 147153b381b3SDavid Woodhouse 147253b381b3SDavid Woodhouse /* 147353b381b3SDavid Woodhouse * this will normally call finish_rmw to start our write 147453b381b3SDavid Woodhouse * but if there are any failed stripes we'll reconstruct 147553b381b3SDavid Woodhouse * from parity first 147653b381b3SDavid Woodhouse */ 147753b381b3SDavid Woodhouse validate_rbio_for_rmw(rbio); 147853b381b3SDavid Woodhouse return; 147953b381b3SDavid Woodhouse 148053b381b3SDavid Woodhouse cleanup: 148153b381b3SDavid Woodhouse 148258efbc9fSOmar Sandoval rbio_orig_end_io(rbio, BLK_STS_IOERR); 148353b381b3SDavid Woodhouse } 148453b381b3SDavid Woodhouse 148553b381b3SDavid Woodhouse /* 148653b381b3SDavid Woodhouse * the stripe must be locked by the caller. It will 148753b381b3SDavid Woodhouse * unlock after all the writes are done 148853b381b3SDavid Woodhouse */ 148953b381b3SDavid Woodhouse static int raid56_rmw_stripe(struct btrfs_raid_bio *rbio) 149053b381b3SDavid Woodhouse { 149153b381b3SDavid Woodhouse int bios_to_read = 0; 149253b381b3SDavid Woodhouse struct bio_list bio_list; 149353b381b3SDavid Woodhouse int ret; 149453b381b3SDavid Woodhouse int pagenr; 149553b381b3SDavid Woodhouse int stripe; 149653b381b3SDavid Woodhouse struct bio *bio; 149753b381b3SDavid Woodhouse 149853b381b3SDavid Woodhouse bio_list_init(&bio_list); 149953b381b3SDavid Woodhouse 150053b381b3SDavid Woodhouse ret = alloc_rbio_pages(rbio); 150153b381b3SDavid Woodhouse if (ret) 150253b381b3SDavid Woodhouse goto cleanup; 150353b381b3SDavid Woodhouse 150453b381b3SDavid Woodhouse index_rbio_pages(rbio); 150553b381b3SDavid Woodhouse 1506b89e1b01SMiao Xie atomic_set(&rbio->error, 0); 150753b381b3SDavid Woodhouse /* 150853b381b3SDavid Woodhouse * build a list of bios to read all the missing parts of this 150953b381b3SDavid Woodhouse * stripe 151053b381b3SDavid Woodhouse */ 151153b381b3SDavid Woodhouse for (stripe = 0; stripe < rbio->nr_data; stripe++) { 1512915e2290SZhao Lei for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { 151353b381b3SDavid Woodhouse struct page *page; 151453b381b3SDavid Woodhouse /* 151553b381b3SDavid Woodhouse * we want to find all the pages missing from 151653b381b3SDavid Woodhouse * the rbio and read them from the disk. If 151753b381b3SDavid Woodhouse * page_in_rbio finds a page in the bio list 151853b381b3SDavid Woodhouse * we don't need to read it off the stripe. 151953b381b3SDavid Woodhouse */ 152053b381b3SDavid Woodhouse page = page_in_rbio(rbio, stripe, pagenr, 1); 152153b381b3SDavid Woodhouse if (page) 152253b381b3SDavid Woodhouse continue; 152353b381b3SDavid Woodhouse 152453b381b3SDavid Woodhouse page = rbio_stripe_page(rbio, stripe, pagenr); 15254ae10b3aSChris Mason /* 15264ae10b3aSChris Mason * the bio cache may have handed us an uptodate 15274ae10b3aSChris Mason * page. If so, be happy and use it 15284ae10b3aSChris Mason */ 15294ae10b3aSChris Mason if (PageUptodate(page)) 15304ae10b3aSChris Mason continue; 15314ae10b3aSChris Mason 153253b381b3SDavid Woodhouse ret = rbio_add_io_page(rbio, &bio_list, page, 153353b381b3SDavid Woodhouse stripe, pagenr, rbio->stripe_len); 153453b381b3SDavid Woodhouse if (ret) 153553b381b3SDavid Woodhouse goto cleanup; 153653b381b3SDavid Woodhouse } 153753b381b3SDavid Woodhouse } 153853b381b3SDavid Woodhouse 153953b381b3SDavid Woodhouse bios_to_read = bio_list_size(&bio_list); 154053b381b3SDavid Woodhouse if (!bios_to_read) { 154153b381b3SDavid Woodhouse /* 154253b381b3SDavid Woodhouse * this can happen if others have merged with 154353b381b3SDavid Woodhouse * us, it means there is nothing left to read. 154453b381b3SDavid Woodhouse * But if there are missing devices it may not be 154553b381b3SDavid Woodhouse * safe to do the full stripe write yet. 154653b381b3SDavid Woodhouse */ 154753b381b3SDavid Woodhouse goto finish; 154853b381b3SDavid Woodhouse } 154953b381b3SDavid Woodhouse 155053b381b3SDavid Woodhouse /* 155153b381b3SDavid Woodhouse * the bbio may be freed once we submit the last bio. Make sure 155253b381b3SDavid Woodhouse * not to touch it after that 155353b381b3SDavid Woodhouse */ 1554b89e1b01SMiao Xie atomic_set(&rbio->stripes_pending, bios_to_read); 1555bf28a605SNikolay Borisov while ((bio = bio_list_pop(&bio_list))) { 155653b381b3SDavid Woodhouse bio->bi_private = rbio; 155753b381b3SDavid Woodhouse bio->bi_end_io = raid_rmw_end_io; 1558ebcc3263SDavid Sterba bio->bi_opf = REQ_OP_READ; 155953b381b3SDavid Woodhouse 15600b246afaSJeff Mahoney btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); 156153b381b3SDavid Woodhouse 15624e49ea4aSMike Christie submit_bio(bio); 156353b381b3SDavid Woodhouse } 156453b381b3SDavid Woodhouse /* the actual write will happen once the reads are done */ 156553b381b3SDavid Woodhouse return 0; 156653b381b3SDavid Woodhouse 156753b381b3SDavid Woodhouse cleanup: 156858efbc9fSOmar Sandoval rbio_orig_end_io(rbio, BLK_STS_IOERR); 1569785884fcSLiu Bo 1570785884fcSLiu Bo while ((bio = bio_list_pop(&bio_list))) 1571785884fcSLiu Bo bio_put(bio); 1572785884fcSLiu Bo 157353b381b3SDavid Woodhouse return -EIO; 157453b381b3SDavid Woodhouse 157553b381b3SDavid Woodhouse finish: 157653b381b3SDavid Woodhouse validate_rbio_for_rmw(rbio); 157753b381b3SDavid Woodhouse return 0; 157853b381b3SDavid Woodhouse } 157953b381b3SDavid Woodhouse 158053b381b3SDavid Woodhouse /* 158153b381b3SDavid Woodhouse * if the upper layers pass in a full stripe, we thank them by only allocating 158253b381b3SDavid Woodhouse * enough pages to hold the parity, and sending it all down quickly. 158353b381b3SDavid Woodhouse */ 158453b381b3SDavid Woodhouse static int full_stripe_write(struct btrfs_raid_bio *rbio) 158553b381b3SDavid Woodhouse { 158653b381b3SDavid Woodhouse int ret; 158753b381b3SDavid Woodhouse 158853b381b3SDavid Woodhouse ret = alloc_rbio_parity_pages(rbio); 15893cd846d1SMiao Xie if (ret) { 15903cd846d1SMiao Xie __free_raid_bio(rbio); 159153b381b3SDavid Woodhouse return ret; 15923cd846d1SMiao Xie } 159353b381b3SDavid Woodhouse 159453b381b3SDavid Woodhouse ret = lock_stripe_add(rbio); 159553b381b3SDavid Woodhouse if (ret == 0) 159653b381b3SDavid Woodhouse finish_rmw(rbio); 159753b381b3SDavid Woodhouse return 0; 159853b381b3SDavid Woodhouse } 159953b381b3SDavid Woodhouse 160053b381b3SDavid Woodhouse /* 160153b381b3SDavid Woodhouse * partial stripe writes get handed over to async helpers. 160253b381b3SDavid Woodhouse * We're really hoping to merge a few more writes into this 160353b381b3SDavid Woodhouse * rbio before calculating new parity 160453b381b3SDavid Woodhouse */ 160553b381b3SDavid Woodhouse static int partial_stripe_write(struct btrfs_raid_bio *rbio) 160653b381b3SDavid Woodhouse { 160753b381b3SDavid Woodhouse int ret; 160853b381b3SDavid Woodhouse 160953b381b3SDavid Woodhouse ret = lock_stripe_add(rbio); 161053b381b3SDavid Woodhouse if (ret == 0) 1611cf6a4a75SDavid Sterba start_async_work(rbio, rmw_work); 161253b381b3SDavid Woodhouse return 0; 161353b381b3SDavid Woodhouse } 161453b381b3SDavid Woodhouse 161553b381b3SDavid Woodhouse /* 161653b381b3SDavid Woodhouse * sometimes while we were reading from the drive to 161753b381b3SDavid Woodhouse * recalculate parity, enough new bios come into create 161853b381b3SDavid Woodhouse * a full stripe. So we do a check here to see if we can 161953b381b3SDavid Woodhouse * go directly to finish_rmw 162053b381b3SDavid Woodhouse */ 162153b381b3SDavid Woodhouse static int __raid56_parity_write(struct btrfs_raid_bio *rbio) 162253b381b3SDavid Woodhouse { 162353b381b3SDavid Woodhouse /* head off into rmw land if we don't have a full stripe */ 162453b381b3SDavid Woodhouse if (!rbio_is_full(rbio)) 162553b381b3SDavid Woodhouse return partial_stripe_write(rbio); 162653b381b3SDavid Woodhouse return full_stripe_write(rbio); 162753b381b3SDavid Woodhouse } 162853b381b3SDavid Woodhouse 162953b381b3SDavid Woodhouse /* 16306ac0f488SChris Mason * We use plugging call backs to collect full stripes. 16316ac0f488SChris Mason * Any time we get a partial stripe write while plugged 16326ac0f488SChris Mason * we collect it into a list. When the unplug comes down, 16336ac0f488SChris Mason * we sort the list by logical block number and merge 16346ac0f488SChris Mason * everything we can into the same rbios 16356ac0f488SChris Mason */ 16366ac0f488SChris Mason struct btrfs_plug_cb { 16376ac0f488SChris Mason struct blk_plug_cb cb; 16386ac0f488SChris Mason struct btrfs_fs_info *info; 16396ac0f488SChris Mason struct list_head rbio_list; 16406ac0f488SChris Mason struct btrfs_work work; 16416ac0f488SChris Mason }; 16426ac0f488SChris Mason 16436ac0f488SChris Mason /* 16446ac0f488SChris Mason * rbios on the plug list are sorted for easier merging. 16456ac0f488SChris Mason */ 16466ac0f488SChris Mason static int plug_cmp(void *priv, struct list_head *a, struct list_head *b) 16476ac0f488SChris Mason { 16486ac0f488SChris Mason struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio, 16496ac0f488SChris Mason plug_list); 16506ac0f488SChris Mason struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio, 16516ac0f488SChris Mason plug_list); 16524f024f37SKent Overstreet u64 a_sector = ra->bio_list.head->bi_iter.bi_sector; 16534f024f37SKent Overstreet u64 b_sector = rb->bio_list.head->bi_iter.bi_sector; 16546ac0f488SChris Mason 16556ac0f488SChris Mason if (a_sector < b_sector) 16566ac0f488SChris Mason return -1; 16576ac0f488SChris Mason if (a_sector > b_sector) 16586ac0f488SChris Mason return 1; 16596ac0f488SChris Mason return 0; 16606ac0f488SChris Mason } 16616ac0f488SChris Mason 16626ac0f488SChris Mason static void run_plug(struct btrfs_plug_cb *plug) 16636ac0f488SChris Mason { 16646ac0f488SChris Mason struct btrfs_raid_bio *cur; 16656ac0f488SChris Mason struct btrfs_raid_bio *last = NULL; 16666ac0f488SChris Mason 16676ac0f488SChris Mason /* 16686ac0f488SChris Mason * sort our plug list then try to merge 16696ac0f488SChris Mason * everything we can in hopes of creating full 16706ac0f488SChris Mason * stripes. 16716ac0f488SChris Mason */ 16726ac0f488SChris Mason list_sort(NULL, &plug->rbio_list, plug_cmp); 16736ac0f488SChris Mason while (!list_empty(&plug->rbio_list)) { 16746ac0f488SChris Mason cur = list_entry(plug->rbio_list.next, 16756ac0f488SChris Mason struct btrfs_raid_bio, plug_list); 16766ac0f488SChris Mason list_del_init(&cur->plug_list); 16776ac0f488SChris Mason 16786ac0f488SChris Mason if (rbio_is_full(cur)) { 1679c7b562c5SDavid Sterba int ret; 1680c7b562c5SDavid Sterba 16816ac0f488SChris Mason /* we have a full stripe, send it down */ 1682c7b562c5SDavid Sterba ret = full_stripe_write(cur); 1683c7b562c5SDavid Sterba BUG_ON(ret); 16846ac0f488SChris Mason continue; 16856ac0f488SChris Mason } 16866ac0f488SChris Mason if (last) { 16876ac0f488SChris Mason if (rbio_can_merge(last, cur)) { 16886ac0f488SChris Mason merge_rbio(last, cur); 16896ac0f488SChris Mason __free_raid_bio(cur); 16906ac0f488SChris Mason continue; 16916ac0f488SChris Mason 16926ac0f488SChris Mason } 16936ac0f488SChris Mason __raid56_parity_write(last); 16946ac0f488SChris Mason } 16956ac0f488SChris Mason last = cur; 16966ac0f488SChris Mason } 16976ac0f488SChris Mason if (last) { 16986ac0f488SChris Mason __raid56_parity_write(last); 16996ac0f488SChris Mason } 17006ac0f488SChris Mason kfree(plug); 17016ac0f488SChris Mason } 17026ac0f488SChris Mason 17036ac0f488SChris Mason /* 17046ac0f488SChris Mason * if the unplug comes from schedule, we have to push the 17056ac0f488SChris Mason * work off to a helper thread 17066ac0f488SChris Mason */ 17076ac0f488SChris Mason static void unplug_work(struct btrfs_work *work) 17086ac0f488SChris Mason { 17096ac0f488SChris Mason struct btrfs_plug_cb *plug; 17106ac0f488SChris Mason plug = container_of(work, struct btrfs_plug_cb, work); 17116ac0f488SChris Mason run_plug(plug); 17126ac0f488SChris Mason } 17136ac0f488SChris Mason 17146ac0f488SChris Mason static void btrfs_raid_unplug(struct blk_plug_cb *cb, bool from_schedule) 17156ac0f488SChris Mason { 17166ac0f488SChris Mason struct btrfs_plug_cb *plug; 17176ac0f488SChris Mason plug = container_of(cb, struct btrfs_plug_cb, cb); 17186ac0f488SChris Mason 17196ac0f488SChris Mason if (from_schedule) { 1720a0cac0ecSOmar Sandoval btrfs_init_work(&plug->work, unplug_work, NULL, NULL); 1721d05a33acSQu Wenruo btrfs_queue_work(plug->info->rmw_workers, 17226ac0f488SChris Mason &plug->work); 17236ac0f488SChris Mason return; 17246ac0f488SChris Mason } 17256ac0f488SChris Mason run_plug(plug); 17266ac0f488SChris Mason } 17276ac0f488SChris Mason 17286ac0f488SChris Mason /* 172953b381b3SDavid Woodhouse * our main entry point for writes from the rest of the FS. 173053b381b3SDavid Woodhouse */ 17312ff7e61eSJeff Mahoney int raid56_parity_write(struct btrfs_fs_info *fs_info, struct bio *bio, 17328e5cfb55SZhao Lei struct btrfs_bio *bbio, u64 stripe_len) 173353b381b3SDavid Woodhouse { 173453b381b3SDavid Woodhouse struct btrfs_raid_bio *rbio; 17356ac0f488SChris Mason struct btrfs_plug_cb *plug = NULL; 17366ac0f488SChris Mason struct blk_plug_cb *cb; 17374245215dSMiao Xie int ret; 173853b381b3SDavid Woodhouse 17392ff7e61eSJeff Mahoney rbio = alloc_rbio(fs_info, bbio, stripe_len); 1740af8e2d1dSMiao Xie if (IS_ERR(rbio)) { 17416e9606d2SZhao Lei btrfs_put_bbio(bbio); 174253b381b3SDavid Woodhouse return PTR_ERR(rbio); 1743af8e2d1dSMiao Xie } 174453b381b3SDavid Woodhouse bio_list_add(&rbio->bio_list, bio); 17454f024f37SKent Overstreet rbio->bio_list_bytes = bio->bi_iter.bi_size; 17461b94b556SMiao Xie rbio->operation = BTRFS_RBIO_WRITE; 17476ac0f488SChris Mason 17480b246afaSJeff Mahoney btrfs_bio_counter_inc_noblocked(fs_info); 17494245215dSMiao Xie rbio->generic_bio_cnt = 1; 17504245215dSMiao Xie 17516ac0f488SChris Mason /* 17526ac0f488SChris Mason * don't plug on full rbios, just get them out the door 17536ac0f488SChris Mason * as quickly as we can 17546ac0f488SChris Mason */ 17554245215dSMiao Xie if (rbio_is_full(rbio)) { 17564245215dSMiao Xie ret = full_stripe_write(rbio); 17574245215dSMiao Xie if (ret) 17580b246afaSJeff Mahoney btrfs_bio_counter_dec(fs_info); 17594245215dSMiao Xie return ret; 17604245215dSMiao Xie } 17616ac0f488SChris Mason 17620b246afaSJeff Mahoney cb = blk_check_plugged(btrfs_raid_unplug, fs_info, sizeof(*plug)); 17636ac0f488SChris Mason if (cb) { 17646ac0f488SChris Mason plug = container_of(cb, struct btrfs_plug_cb, cb); 17656ac0f488SChris Mason if (!plug->info) { 17660b246afaSJeff Mahoney plug->info = fs_info; 17676ac0f488SChris Mason INIT_LIST_HEAD(&plug->rbio_list); 17686ac0f488SChris Mason } 17696ac0f488SChris Mason list_add_tail(&rbio->plug_list, &plug->rbio_list); 17704245215dSMiao Xie ret = 0; 17716ac0f488SChris Mason } else { 17724245215dSMiao Xie ret = __raid56_parity_write(rbio); 17734245215dSMiao Xie if (ret) 17740b246afaSJeff Mahoney btrfs_bio_counter_dec(fs_info); 177553b381b3SDavid Woodhouse } 17764245215dSMiao Xie return ret; 17776ac0f488SChris Mason } 177853b381b3SDavid Woodhouse 177953b381b3SDavid Woodhouse /* 178053b381b3SDavid Woodhouse * all parity reconstruction happens here. We've read in everything 178153b381b3SDavid Woodhouse * we can find from the drives and this does the heavy lifting of 178253b381b3SDavid Woodhouse * sorting the good from the bad. 178353b381b3SDavid Woodhouse */ 178453b381b3SDavid Woodhouse static void __raid_recover_end_io(struct btrfs_raid_bio *rbio) 178553b381b3SDavid Woodhouse { 178653b381b3SDavid Woodhouse int pagenr, stripe; 178753b381b3SDavid Woodhouse void **pointers; 178853b381b3SDavid Woodhouse int faila = -1, failb = -1; 178953b381b3SDavid Woodhouse struct page *page; 179058efbc9fSOmar Sandoval blk_status_t err; 179153b381b3SDavid Woodhouse int i; 179253b381b3SDavid Woodhouse 179331e818feSDavid Sterba pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS); 179453b381b3SDavid Woodhouse if (!pointers) { 179558efbc9fSOmar Sandoval err = BLK_STS_RESOURCE; 179653b381b3SDavid Woodhouse goto cleanup_io; 179753b381b3SDavid Woodhouse } 179853b381b3SDavid Woodhouse 179953b381b3SDavid Woodhouse faila = rbio->faila; 180053b381b3SDavid Woodhouse failb = rbio->failb; 180153b381b3SDavid Woodhouse 1802b4ee1782SOmar Sandoval if (rbio->operation == BTRFS_RBIO_READ_REBUILD || 1803b4ee1782SOmar Sandoval rbio->operation == BTRFS_RBIO_REBUILD_MISSING) { 180453b381b3SDavid Woodhouse spin_lock_irq(&rbio->bio_list_lock); 180553b381b3SDavid Woodhouse set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags); 180653b381b3SDavid Woodhouse spin_unlock_irq(&rbio->bio_list_lock); 180753b381b3SDavid Woodhouse } 180853b381b3SDavid Woodhouse 180953b381b3SDavid Woodhouse index_rbio_pages(rbio); 181053b381b3SDavid Woodhouse 1811915e2290SZhao Lei for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { 18125a6ac9eaSMiao Xie /* 18135a6ac9eaSMiao Xie * Now we just use bitmap to mark the horizontal stripes in 18145a6ac9eaSMiao Xie * which we have data when doing parity scrub. 18155a6ac9eaSMiao Xie */ 18165a6ac9eaSMiao Xie if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB && 18175a6ac9eaSMiao Xie !test_bit(pagenr, rbio->dbitmap)) 18185a6ac9eaSMiao Xie continue; 18195a6ac9eaSMiao Xie 182053b381b3SDavid Woodhouse /* setup our array of pointers with pages 182153b381b3SDavid Woodhouse * from each stripe 182253b381b3SDavid Woodhouse */ 18232c8cdd6eSMiao Xie for (stripe = 0; stripe < rbio->real_stripes; stripe++) { 182453b381b3SDavid Woodhouse /* 182553b381b3SDavid Woodhouse * if we're rebuilding a read, we have to use 182653b381b3SDavid Woodhouse * pages from the bio list 182753b381b3SDavid Woodhouse */ 1828b4ee1782SOmar Sandoval if ((rbio->operation == BTRFS_RBIO_READ_REBUILD || 1829b4ee1782SOmar Sandoval rbio->operation == BTRFS_RBIO_REBUILD_MISSING) && 183053b381b3SDavid Woodhouse (stripe == faila || stripe == failb)) { 183153b381b3SDavid Woodhouse page = page_in_rbio(rbio, stripe, pagenr, 0); 183253b381b3SDavid Woodhouse } else { 183353b381b3SDavid Woodhouse page = rbio_stripe_page(rbio, stripe, pagenr); 183453b381b3SDavid Woodhouse } 183553b381b3SDavid Woodhouse pointers[stripe] = kmap(page); 183653b381b3SDavid Woodhouse } 183753b381b3SDavid Woodhouse 183853b381b3SDavid Woodhouse /* all raid6 handling here */ 183910f11900SZhao Lei if (rbio->bbio->map_type & BTRFS_BLOCK_GROUP_RAID6) { 184053b381b3SDavid Woodhouse /* 184153b381b3SDavid Woodhouse * single failure, rebuild from parity raid5 184253b381b3SDavid Woodhouse * style 184353b381b3SDavid Woodhouse */ 184453b381b3SDavid Woodhouse if (failb < 0) { 184553b381b3SDavid Woodhouse if (faila == rbio->nr_data) { 184653b381b3SDavid Woodhouse /* 184753b381b3SDavid Woodhouse * Just the P stripe has failed, without 184853b381b3SDavid Woodhouse * a bad data or Q stripe. 184953b381b3SDavid Woodhouse * TODO, we should redo the xor here. 185053b381b3SDavid Woodhouse */ 185158efbc9fSOmar Sandoval err = BLK_STS_IOERR; 185253b381b3SDavid Woodhouse goto cleanup; 185353b381b3SDavid Woodhouse } 185453b381b3SDavid Woodhouse /* 185553b381b3SDavid Woodhouse * a single failure in raid6 is rebuilt 185653b381b3SDavid Woodhouse * in the pstripe code below 185753b381b3SDavid Woodhouse */ 185853b381b3SDavid Woodhouse goto pstripe; 185953b381b3SDavid Woodhouse } 186053b381b3SDavid Woodhouse 186153b381b3SDavid Woodhouse /* make sure our ps and qs are in order */ 1862b7d2083aSNikolay Borisov if (faila > failb) 1863b7d2083aSNikolay Borisov swap(faila, failb); 186453b381b3SDavid Woodhouse 186553b381b3SDavid Woodhouse /* if the q stripe is failed, do a pstripe reconstruction 186653b381b3SDavid Woodhouse * from the xors. 186753b381b3SDavid Woodhouse * If both the q stripe and the P stripe are failed, we're 186853b381b3SDavid Woodhouse * here due to a crc mismatch and we can't give them the 186953b381b3SDavid Woodhouse * data they want 187053b381b3SDavid Woodhouse */ 18718e5cfb55SZhao Lei if (rbio->bbio->raid_map[failb] == RAID6_Q_STRIPE) { 18728e5cfb55SZhao Lei if (rbio->bbio->raid_map[faila] == 18738e5cfb55SZhao Lei RAID5_P_STRIPE) { 187458efbc9fSOmar Sandoval err = BLK_STS_IOERR; 187553b381b3SDavid Woodhouse goto cleanup; 187653b381b3SDavid Woodhouse } 187753b381b3SDavid Woodhouse /* 187853b381b3SDavid Woodhouse * otherwise we have one bad data stripe and 187953b381b3SDavid Woodhouse * a good P stripe. raid5! 188053b381b3SDavid Woodhouse */ 188153b381b3SDavid Woodhouse goto pstripe; 188253b381b3SDavid Woodhouse } 188353b381b3SDavid Woodhouse 18848e5cfb55SZhao Lei if (rbio->bbio->raid_map[failb] == RAID5_P_STRIPE) { 18852c8cdd6eSMiao Xie raid6_datap_recov(rbio->real_stripes, 188653b381b3SDavid Woodhouse PAGE_SIZE, faila, pointers); 188753b381b3SDavid Woodhouse } else { 18882c8cdd6eSMiao Xie raid6_2data_recov(rbio->real_stripes, 188953b381b3SDavid Woodhouse PAGE_SIZE, faila, failb, 189053b381b3SDavid Woodhouse pointers); 189153b381b3SDavid Woodhouse } 189253b381b3SDavid Woodhouse } else { 189353b381b3SDavid Woodhouse void *p; 189453b381b3SDavid Woodhouse 189553b381b3SDavid Woodhouse /* rebuild from P stripe here (raid5 or raid6) */ 189653b381b3SDavid Woodhouse BUG_ON(failb != -1); 189753b381b3SDavid Woodhouse pstripe: 189853b381b3SDavid Woodhouse /* Copy parity block into failed block to start with */ 189969d24804SDavid Sterba copy_page(pointers[faila], pointers[rbio->nr_data]); 190053b381b3SDavid Woodhouse 190153b381b3SDavid Woodhouse /* rearrange the pointer array */ 190253b381b3SDavid Woodhouse p = pointers[faila]; 190353b381b3SDavid Woodhouse for (stripe = faila; stripe < rbio->nr_data - 1; stripe++) 190453b381b3SDavid Woodhouse pointers[stripe] = pointers[stripe + 1]; 190553b381b3SDavid Woodhouse pointers[rbio->nr_data - 1] = p; 190653b381b3SDavid Woodhouse 190753b381b3SDavid Woodhouse /* xor in the rest */ 190809cbfeafSKirill A. Shutemov run_xor(pointers, rbio->nr_data - 1, PAGE_SIZE); 190953b381b3SDavid Woodhouse } 191053b381b3SDavid Woodhouse /* if we're doing this rebuild as part of an rmw, go through 191153b381b3SDavid Woodhouse * and set all of our private rbio pages in the 191253b381b3SDavid Woodhouse * failed stripes as uptodate. This way finish_rmw will 191353b381b3SDavid Woodhouse * know they can be trusted. If this was a read reconstruction, 191453b381b3SDavid Woodhouse * other endio functions will fiddle the uptodate bits 191553b381b3SDavid Woodhouse */ 19161b94b556SMiao Xie if (rbio->operation == BTRFS_RBIO_WRITE) { 1917915e2290SZhao Lei for (i = 0; i < rbio->stripe_npages; i++) { 191853b381b3SDavid Woodhouse if (faila != -1) { 191953b381b3SDavid Woodhouse page = rbio_stripe_page(rbio, faila, i); 192053b381b3SDavid Woodhouse SetPageUptodate(page); 192153b381b3SDavid Woodhouse } 192253b381b3SDavid Woodhouse if (failb != -1) { 192353b381b3SDavid Woodhouse page = rbio_stripe_page(rbio, failb, i); 192453b381b3SDavid Woodhouse SetPageUptodate(page); 192553b381b3SDavid Woodhouse } 192653b381b3SDavid Woodhouse } 192753b381b3SDavid Woodhouse } 19282c8cdd6eSMiao Xie for (stripe = 0; stripe < rbio->real_stripes; stripe++) { 192953b381b3SDavid Woodhouse /* 193053b381b3SDavid Woodhouse * if we're rebuilding a read, we have to use 193153b381b3SDavid Woodhouse * pages from the bio list 193253b381b3SDavid Woodhouse */ 1933b4ee1782SOmar Sandoval if ((rbio->operation == BTRFS_RBIO_READ_REBUILD || 1934b4ee1782SOmar Sandoval rbio->operation == BTRFS_RBIO_REBUILD_MISSING) && 193553b381b3SDavid Woodhouse (stripe == faila || stripe == failb)) { 193653b381b3SDavid Woodhouse page = page_in_rbio(rbio, stripe, pagenr, 0); 193753b381b3SDavid Woodhouse } else { 193853b381b3SDavid Woodhouse page = rbio_stripe_page(rbio, stripe, pagenr); 193953b381b3SDavid Woodhouse } 194053b381b3SDavid Woodhouse kunmap(page); 194153b381b3SDavid Woodhouse } 194253b381b3SDavid Woodhouse } 194353b381b3SDavid Woodhouse 194458efbc9fSOmar Sandoval err = BLK_STS_OK; 194553b381b3SDavid Woodhouse cleanup: 194653b381b3SDavid Woodhouse kfree(pointers); 194753b381b3SDavid Woodhouse 194853b381b3SDavid Woodhouse cleanup_io: 1949580c6efaSLiu Bo /* 1950580c6efaSLiu Bo * Similar to READ_REBUILD, REBUILD_MISSING at this point also has a 1951580c6efaSLiu Bo * valid rbio which is consistent with ondisk content, thus such a 1952580c6efaSLiu Bo * valid rbio can be cached to avoid further disk reads. 1953580c6efaSLiu Bo */ 1954580c6efaSLiu Bo if (rbio->operation == BTRFS_RBIO_READ_REBUILD || 1955580c6efaSLiu Bo rbio->operation == BTRFS_RBIO_REBUILD_MISSING) { 195644ac474dSLiu Bo /* 195744ac474dSLiu Bo * - In case of two failures, where rbio->failb != -1: 195844ac474dSLiu Bo * 195944ac474dSLiu Bo * Do not cache this rbio since the above read reconstruction 196044ac474dSLiu Bo * (raid6_datap_recov() or raid6_2data_recov()) may have 196144ac474dSLiu Bo * changed some content of stripes which are not identical to 196244ac474dSLiu Bo * on-disk content any more, otherwise, a later write/recover 196344ac474dSLiu Bo * may steal stripe_pages from this rbio and end up with 196444ac474dSLiu Bo * corruptions or rebuild failures. 196544ac474dSLiu Bo * 196644ac474dSLiu Bo * - In case of single failure, where rbio->failb == -1: 196744ac474dSLiu Bo * 196844ac474dSLiu Bo * Cache this rbio iff the above read reconstruction is 196952042d8eSAndrea Gelmini * executed without problems. 197044ac474dSLiu Bo */ 197144ac474dSLiu Bo if (err == BLK_STS_OK && rbio->failb < 0) 19724ae10b3aSChris Mason cache_rbio_pages(rbio); 19734ae10b3aSChris Mason else 19744ae10b3aSChris Mason clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); 19754ae10b3aSChris Mason 19764246a0b6SChristoph Hellwig rbio_orig_end_io(rbio, err); 197758efbc9fSOmar Sandoval } else if (err == BLK_STS_OK) { 197853b381b3SDavid Woodhouse rbio->faila = -1; 197953b381b3SDavid Woodhouse rbio->failb = -1; 19805a6ac9eaSMiao Xie 19815a6ac9eaSMiao Xie if (rbio->operation == BTRFS_RBIO_WRITE) 198253b381b3SDavid Woodhouse finish_rmw(rbio); 19835a6ac9eaSMiao Xie else if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB) 19845a6ac9eaSMiao Xie finish_parity_scrub(rbio, 0); 19855a6ac9eaSMiao Xie else 19865a6ac9eaSMiao Xie BUG(); 198753b381b3SDavid Woodhouse } else { 19884246a0b6SChristoph Hellwig rbio_orig_end_io(rbio, err); 198953b381b3SDavid Woodhouse } 199053b381b3SDavid Woodhouse } 199153b381b3SDavid Woodhouse 199253b381b3SDavid Woodhouse /* 199353b381b3SDavid Woodhouse * This is called only for stripes we've read from disk to 199453b381b3SDavid Woodhouse * reconstruct the parity. 199553b381b3SDavid Woodhouse */ 19964246a0b6SChristoph Hellwig static void raid_recover_end_io(struct bio *bio) 199753b381b3SDavid Woodhouse { 199853b381b3SDavid Woodhouse struct btrfs_raid_bio *rbio = bio->bi_private; 199953b381b3SDavid Woodhouse 200053b381b3SDavid Woodhouse /* 200153b381b3SDavid Woodhouse * we only read stripe pages off the disk, set them 200253b381b3SDavid Woodhouse * up to date if there were no errors 200353b381b3SDavid Woodhouse */ 20044e4cbee9SChristoph Hellwig if (bio->bi_status) 200553b381b3SDavid Woodhouse fail_bio_stripe(rbio, bio); 200653b381b3SDavid Woodhouse else 200753b381b3SDavid Woodhouse set_bio_pages_uptodate(bio); 200853b381b3SDavid Woodhouse bio_put(bio); 200953b381b3SDavid Woodhouse 2010b89e1b01SMiao Xie if (!atomic_dec_and_test(&rbio->stripes_pending)) 201153b381b3SDavid Woodhouse return; 201253b381b3SDavid Woodhouse 2013b89e1b01SMiao Xie if (atomic_read(&rbio->error) > rbio->bbio->max_errors) 201458efbc9fSOmar Sandoval rbio_orig_end_io(rbio, BLK_STS_IOERR); 201553b381b3SDavid Woodhouse else 201653b381b3SDavid Woodhouse __raid_recover_end_io(rbio); 201753b381b3SDavid Woodhouse } 201853b381b3SDavid Woodhouse 201953b381b3SDavid Woodhouse /* 202053b381b3SDavid Woodhouse * reads everything we need off the disk to reconstruct 202153b381b3SDavid Woodhouse * the parity. endio handlers trigger final reconstruction 202253b381b3SDavid Woodhouse * when the IO is done. 202353b381b3SDavid Woodhouse * 202453b381b3SDavid Woodhouse * This is used both for reads from the higher layers and for 202553b381b3SDavid Woodhouse * parity construction required to finish a rmw cycle. 202653b381b3SDavid Woodhouse */ 202753b381b3SDavid Woodhouse static int __raid56_parity_recover(struct btrfs_raid_bio *rbio) 202853b381b3SDavid Woodhouse { 202953b381b3SDavid Woodhouse int bios_to_read = 0; 203053b381b3SDavid Woodhouse struct bio_list bio_list; 203153b381b3SDavid Woodhouse int ret; 203253b381b3SDavid Woodhouse int pagenr; 203353b381b3SDavid Woodhouse int stripe; 203453b381b3SDavid Woodhouse struct bio *bio; 203553b381b3SDavid Woodhouse 203653b381b3SDavid Woodhouse bio_list_init(&bio_list); 203753b381b3SDavid Woodhouse 203853b381b3SDavid Woodhouse ret = alloc_rbio_pages(rbio); 203953b381b3SDavid Woodhouse if (ret) 204053b381b3SDavid Woodhouse goto cleanup; 204153b381b3SDavid Woodhouse 2042b89e1b01SMiao Xie atomic_set(&rbio->error, 0); 204353b381b3SDavid Woodhouse 204453b381b3SDavid Woodhouse /* 20454ae10b3aSChris Mason * read everything that hasn't failed. Thanks to the 20464ae10b3aSChris Mason * stripe cache, it is possible that some or all of these 20474ae10b3aSChris Mason * pages are going to be uptodate. 204853b381b3SDavid Woodhouse */ 20492c8cdd6eSMiao Xie for (stripe = 0; stripe < rbio->real_stripes; stripe++) { 20505588383eSLiu Bo if (rbio->faila == stripe || rbio->failb == stripe) { 2051b89e1b01SMiao Xie atomic_inc(&rbio->error); 205253b381b3SDavid Woodhouse continue; 20535588383eSLiu Bo } 205453b381b3SDavid Woodhouse 2055915e2290SZhao Lei for (pagenr = 0; pagenr < rbio->stripe_npages; pagenr++) { 205653b381b3SDavid Woodhouse struct page *p; 205753b381b3SDavid Woodhouse 205853b381b3SDavid Woodhouse /* 205953b381b3SDavid Woodhouse * the rmw code may have already read this 206053b381b3SDavid Woodhouse * page in 206153b381b3SDavid Woodhouse */ 206253b381b3SDavid Woodhouse p = rbio_stripe_page(rbio, stripe, pagenr); 206353b381b3SDavid Woodhouse if (PageUptodate(p)) 206453b381b3SDavid Woodhouse continue; 206553b381b3SDavid Woodhouse 206653b381b3SDavid Woodhouse ret = rbio_add_io_page(rbio, &bio_list, 206753b381b3SDavid Woodhouse rbio_stripe_page(rbio, stripe, pagenr), 206853b381b3SDavid Woodhouse stripe, pagenr, rbio->stripe_len); 206953b381b3SDavid Woodhouse if (ret < 0) 207053b381b3SDavid Woodhouse goto cleanup; 207153b381b3SDavid Woodhouse } 207253b381b3SDavid Woodhouse } 207353b381b3SDavid Woodhouse 207453b381b3SDavid Woodhouse bios_to_read = bio_list_size(&bio_list); 207553b381b3SDavid Woodhouse if (!bios_to_read) { 207653b381b3SDavid Woodhouse /* 207753b381b3SDavid Woodhouse * we might have no bios to read just because the pages 207853b381b3SDavid Woodhouse * were up to date, or we might have no bios to read because 207953b381b3SDavid Woodhouse * the devices were gone. 208053b381b3SDavid Woodhouse */ 2081b89e1b01SMiao Xie if (atomic_read(&rbio->error) <= rbio->bbio->max_errors) { 208253b381b3SDavid Woodhouse __raid_recover_end_io(rbio); 2083813f8a0eSNikolay Borisov return 0; 208453b381b3SDavid Woodhouse } else { 208553b381b3SDavid Woodhouse goto cleanup; 208653b381b3SDavid Woodhouse } 208753b381b3SDavid Woodhouse } 208853b381b3SDavid Woodhouse 208953b381b3SDavid Woodhouse /* 209053b381b3SDavid Woodhouse * the bbio may be freed once we submit the last bio. Make sure 209153b381b3SDavid Woodhouse * not to touch it after that 209253b381b3SDavid Woodhouse */ 2093b89e1b01SMiao Xie atomic_set(&rbio->stripes_pending, bios_to_read); 2094bf28a605SNikolay Borisov while ((bio = bio_list_pop(&bio_list))) { 209553b381b3SDavid Woodhouse bio->bi_private = rbio; 209653b381b3SDavid Woodhouse bio->bi_end_io = raid_recover_end_io; 2097ebcc3263SDavid Sterba bio->bi_opf = REQ_OP_READ; 209853b381b3SDavid Woodhouse 20990b246afaSJeff Mahoney btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); 210053b381b3SDavid Woodhouse 21014e49ea4aSMike Christie submit_bio(bio); 210253b381b3SDavid Woodhouse } 2103813f8a0eSNikolay Borisov 210453b381b3SDavid Woodhouse return 0; 210553b381b3SDavid Woodhouse 210653b381b3SDavid Woodhouse cleanup: 2107b4ee1782SOmar Sandoval if (rbio->operation == BTRFS_RBIO_READ_REBUILD || 2108b4ee1782SOmar Sandoval rbio->operation == BTRFS_RBIO_REBUILD_MISSING) 210958efbc9fSOmar Sandoval rbio_orig_end_io(rbio, BLK_STS_IOERR); 2110785884fcSLiu Bo 2111785884fcSLiu Bo while ((bio = bio_list_pop(&bio_list))) 2112785884fcSLiu Bo bio_put(bio); 2113785884fcSLiu Bo 211453b381b3SDavid Woodhouse return -EIO; 211553b381b3SDavid Woodhouse } 211653b381b3SDavid Woodhouse 211753b381b3SDavid Woodhouse /* 211853b381b3SDavid Woodhouse * the main entry point for reads from the higher layers. This 211953b381b3SDavid Woodhouse * is really only called when the normal read path had a failure, 212053b381b3SDavid Woodhouse * so we assume the bio they send down corresponds to a failed part 212153b381b3SDavid Woodhouse * of the drive. 212253b381b3SDavid Woodhouse */ 21232ff7e61eSJeff Mahoney int raid56_parity_recover(struct btrfs_fs_info *fs_info, struct bio *bio, 21248e5cfb55SZhao Lei struct btrfs_bio *bbio, u64 stripe_len, 21258e5cfb55SZhao Lei int mirror_num, int generic_io) 212653b381b3SDavid Woodhouse { 212753b381b3SDavid Woodhouse struct btrfs_raid_bio *rbio; 212853b381b3SDavid Woodhouse int ret; 212953b381b3SDavid Woodhouse 2130abad60c6SLiu Bo if (generic_io) { 2131abad60c6SLiu Bo ASSERT(bbio->mirror_num == mirror_num); 2132abad60c6SLiu Bo btrfs_io_bio(bio)->mirror_num = mirror_num; 2133abad60c6SLiu Bo } 2134abad60c6SLiu Bo 21352ff7e61eSJeff Mahoney rbio = alloc_rbio(fs_info, bbio, stripe_len); 2136af8e2d1dSMiao Xie if (IS_ERR(rbio)) { 21376e9606d2SZhao Lei if (generic_io) 21386e9606d2SZhao Lei btrfs_put_bbio(bbio); 213953b381b3SDavid Woodhouse return PTR_ERR(rbio); 2140af8e2d1dSMiao Xie } 214153b381b3SDavid Woodhouse 21421b94b556SMiao Xie rbio->operation = BTRFS_RBIO_READ_REBUILD; 214353b381b3SDavid Woodhouse bio_list_add(&rbio->bio_list, bio); 21444f024f37SKent Overstreet rbio->bio_list_bytes = bio->bi_iter.bi_size; 214553b381b3SDavid Woodhouse 214653b381b3SDavid Woodhouse rbio->faila = find_logical_bio_stripe(rbio, bio); 214753b381b3SDavid Woodhouse if (rbio->faila == -1) { 21480b246afaSJeff Mahoney btrfs_warn(fs_info, 2149e46a28caSLiu Bo "%s could not find the bad stripe in raid56 so that we cannot recover any more (bio has logical %llu len %llu, bbio has map_type %llu)", 21501201b58bSDavid Sterba __func__, bio->bi_iter.bi_sector << 9, 2151e46a28caSLiu Bo (u64)bio->bi_iter.bi_size, bbio->map_type); 21526e9606d2SZhao Lei if (generic_io) 21536e9606d2SZhao Lei btrfs_put_bbio(bbio); 215453b381b3SDavid Woodhouse kfree(rbio); 215553b381b3SDavid Woodhouse return -EIO; 215653b381b3SDavid Woodhouse } 215753b381b3SDavid Woodhouse 21584245215dSMiao Xie if (generic_io) { 21590b246afaSJeff Mahoney btrfs_bio_counter_inc_noblocked(fs_info); 21604245215dSMiao Xie rbio->generic_bio_cnt = 1; 21614245215dSMiao Xie } else { 21626e9606d2SZhao Lei btrfs_get_bbio(bbio); 21634245215dSMiao Xie } 21644245215dSMiao Xie 216553b381b3SDavid Woodhouse /* 21668810f751SLiu Bo * Loop retry: 21678810f751SLiu Bo * for 'mirror == 2', reconstruct from all other stripes. 21688810f751SLiu Bo * for 'mirror_num > 2', select a stripe to fail on every retry. 216953b381b3SDavid Woodhouse */ 21708810f751SLiu Bo if (mirror_num > 2) { 21718810f751SLiu Bo /* 21728810f751SLiu Bo * 'mirror == 3' is to fail the p stripe and 21738810f751SLiu Bo * reconstruct from the q stripe. 'mirror > 3' is to 21748810f751SLiu Bo * fail a data stripe and reconstruct from p+q stripe. 21758810f751SLiu Bo */ 21768810f751SLiu Bo rbio->failb = rbio->real_stripes - (mirror_num - 1); 21778810f751SLiu Bo ASSERT(rbio->failb > 0); 21788810f751SLiu Bo if (rbio->failb <= rbio->faila) 21798810f751SLiu Bo rbio->failb--; 21808810f751SLiu Bo } 218153b381b3SDavid Woodhouse 218253b381b3SDavid Woodhouse ret = lock_stripe_add(rbio); 218353b381b3SDavid Woodhouse 218453b381b3SDavid Woodhouse /* 218553b381b3SDavid Woodhouse * __raid56_parity_recover will end the bio with 218653b381b3SDavid Woodhouse * any errors it hits. We don't want to return 218753b381b3SDavid Woodhouse * its error value up the stack because our caller 218853b381b3SDavid Woodhouse * will end up calling bio_endio with any nonzero 218953b381b3SDavid Woodhouse * return 219053b381b3SDavid Woodhouse */ 219153b381b3SDavid Woodhouse if (ret == 0) 219253b381b3SDavid Woodhouse __raid56_parity_recover(rbio); 219353b381b3SDavid Woodhouse /* 219453b381b3SDavid Woodhouse * our rbio has been added to the list of 219553b381b3SDavid Woodhouse * rbios that will be handled after the 219653b381b3SDavid Woodhouse * currently lock owner is done 219753b381b3SDavid Woodhouse */ 219853b381b3SDavid Woodhouse return 0; 219953b381b3SDavid Woodhouse 220053b381b3SDavid Woodhouse } 220153b381b3SDavid Woodhouse 220253b381b3SDavid Woodhouse static void rmw_work(struct btrfs_work *work) 220353b381b3SDavid Woodhouse { 220453b381b3SDavid Woodhouse struct btrfs_raid_bio *rbio; 220553b381b3SDavid Woodhouse 220653b381b3SDavid Woodhouse rbio = container_of(work, struct btrfs_raid_bio, work); 220753b381b3SDavid Woodhouse raid56_rmw_stripe(rbio); 220853b381b3SDavid Woodhouse } 220953b381b3SDavid Woodhouse 221053b381b3SDavid Woodhouse static void read_rebuild_work(struct btrfs_work *work) 221153b381b3SDavid Woodhouse { 221253b381b3SDavid Woodhouse struct btrfs_raid_bio *rbio; 221353b381b3SDavid Woodhouse 221453b381b3SDavid Woodhouse rbio = container_of(work, struct btrfs_raid_bio, work); 221553b381b3SDavid Woodhouse __raid56_parity_recover(rbio); 221653b381b3SDavid Woodhouse } 22175a6ac9eaSMiao Xie 22185a6ac9eaSMiao Xie /* 22195a6ac9eaSMiao Xie * The following code is used to scrub/replace the parity stripe 22205a6ac9eaSMiao Xie * 2221ae6529c3SQu Wenruo * Caller must have already increased bio_counter for getting @bbio. 2222ae6529c3SQu Wenruo * 22235a6ac9eaSMiao Xie * Note: We need make sure all the pages that add into the scrub/replace 22245a6ac9eaSMiao Xie * raid bio are correct and not be changed during the scrub/replace. That 22255a6ac9eaSMiao Xie * is those pages just hold metadata or file data with checksum. 22265a6ac9eaSMiao Xie */ 22275a6ac9eaSMiao Xie 22285a6ac9eaSMiao Xie struct btrfs_raid_bio * 22292ff7e61eSJeff Mahoney raid56_parity_alloc_scrub_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, 22308e5cfb55SZhao Lei struct btrfs_bio *bbio, u64 stripe_len, 22318e5cfb55SZhao Lei struct btrfs_device *scrub_dev, 22325a6ac9eaSMiao Xie unsigned long *dbitmap, int stripe_nsectors) 22335a6ac9eaSMiao Xie { 22345a6ac9eaSMiao Xie struct btrfs_raid_bio *rbio; 22355a6ac9eaSMiao Xie int i; 22365a6ac9eaSMiao Xie 22372ff7e61eSJeff Mahoney rbio = alloc_rbio(fs_info, bbio, stripe_len); 22385a6ac9eaSMiao Xie if (IS_ERR(rbio)) 22395a6ac9eaSMiao Xie return NULL; 22405a6ac9eaSMiao Xie bio_list_add(&rbio->bio_list, bio); 22415a6ac9eaSMiao Xie /* 22425a6ac9eaSMiao Xie * This is a special bio which is used to hold the completion handler 22435a6ac9eaSMiao Xie * and make the scrub rbio is similar to the other types 22445a6ac9eaSMiao Xie */ 22455a6ac9eaSMiao Xie ASSERT(!bio->bi_iter.bi_size); 22465a6ac9eaSMiao Xie rbio->operation = BTRFS_RBIO_PARITY_SCRUB; 22475a6ac9eaSMiao Xie 22489cd3a7ebSLiu Bo /* 22499cd3a7ebSLiu Bo * After mapping bbio with BTRFS_MAP_WRITE, parities have been sorted 22509cd3a7ebSLiu Bo * to the end position, so this search can start from the first parity 22519cd3a7ebSLiu Bo * stripe. 22529cd3a7ebSLiu Bo */ 22539cd3a7ebSLiu Bo for (i = rbio->nr_data; i < rbio->real_stripes; i++) { 22545a6ac9eaSMiao Xie if (bbio->stripes[i].dev == scrub_dev) { 22555a6ac9eaSMiao Xie rbio->scrubp = i; 22565a6ac9eaSMiao Xie break; 22575a6ac9eaSMiao Xie } 22585a6ac9eaSMiao Xie } 22599cd3a7ebSLiu Bo ASSERT(i < rbio->real_stripes); 22605a6ac9eaSMiao Xie 22615a6ac9eaSMiao Xie /* Now we just support the sectorsize equals to page size */ 22620b246afaSJeff Mahoney ASSERT(fs_info->sectorsize == PAGE_SIZE); 22635a6ac9eaSMiao Xie ASSERT(rbio->stripe_npages == stripe_nsectors); 22645a6ac9eaSMiao Xie bitmap_copy(rbio->dbitmap, dbitmap, stripe_nsectors); 22655a6ac9eaSMiao Xie 2266ae6529c3SQu Wenruo /* 2267ae6529c3SQu Wenruo * We have already increased bio_counter when getting bbio, record it 2268ae6529c3SQu Wenruo * so we can free it at rbio_orig_end_io(). 2269ae6529c3SQu Wenruo */ 2270ae6529c3SQu Wenruo rbio->generic_bio_cnt = 1; 2271ae6529c3SQu Wenruo 22725a6ac9eaSMiao Xie return rbio; 22735a6ac9eaSMiao Xie } 22745a6ac9eaSMiao Xie 2275b4ee1782SOmar Sandoval /* Used for both parity scrub and missing. */ 2276b4ee1782SOmar Sandoval void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page, 2277b4ee1782SOmar Sandoval u64 logical) 22785a6ac9eaSMiao Xie { 22795a6ac9eaSMiao Xie int stripe_offset; 22805a6ac9eaSMiao Xie int index; 22815a6ac9eaSMiao Xie 22828e5cfb55SZhao Lei ASSERT(logical >= rbio->bbio->raid_map[0]); 22838e5cfb55SZhao Lei ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] + 22845a6ac9eaSMiao Xie rbio->stripe_len * rbio->nr_data); 22858e5cfb55SZhao Lei stripe_offset = (int)(logical - rbio->bbio->raid_map[0]); 228609cbfeafSKirill A. Shutemov index = stripe_offset >> PAGE_SHIFT; 22875a6ac9eaSMiao Xie rbio->bio_pages[index] = page; 22885a6ac9eaSMiao Xie } 22895a6ac9eaSMiao Xie 22905a6ac9eaSMiao Xie /* 22915a6ac9eaSMiao Xie * We just scrub the parity that we have correct data on the same horizontal, 22925a6ac9eaSMiao Xie * so we needn't allocate all pages for all the stripes. 22935a6ac9eaSMiao Xie */ 22945a6ac9eaSMiao Xie static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio) 22955a6ac9eaSMiao Xie { 22965a6ac9eaSMiao Xie int i; 22975a6ac9eaSMiao Xie int bit; 22985a6ac9eaSMiao Xie int index; 22995a6ac9eaSMiao Xie struct page *page; 23005a6ac9eaSMiao Xie 23015a6ac9eaSMiao Xie for_each_set_bit(bit, rbio->dbitmap, rbio->stripe_npages) { 23022c8cdd6eSMiao Xie for (i = 0; i < rbio->real_stripes; i++) { 23035a6ac9eaSMiao Xie index = i * rbio->stripe_npages + bit; 23045a6ac9eaSMiao Xie if (rbio->stripe_pages[index]) 23055a6ac9eaSMiao Xie continue; 23065a6ac9eaSMiao Xie 23075a6ac9eaSMiao Xie page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 23085a6ac9eaSMiao Xie if (!page) 23095a6ac9eaSMiao Xie return -ENOMEM; 23105a6ac9eaSMiao Xie rbio->stripe_pages[index] = page; 23115a6ac9eaSMiao Xie } 23125a6ac9eaSMiao Xie } 23135a6ac9eaSMiao Xie return 0; 23145a6ac9eaSMiao Xie } 23155a6ac9eaSMiao Xie 23165a6ac9eaSMiao Xie static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio, 23175a6ac9eaSMiao Xie int need_check) 23185a6ac9eaSMiao Xie { 231976035976SMiao Xie struct btrfs_bio *bbio = rbio->bbio; 23201389053eSKees Cook void **pointers = rbio->finish_pointers; 23211389053eSKees Cook unsigned long *pbitmap = rbio->finish_pbitmap; 23225a6ac9eaSMiao Xie int nr_data = rbio->nr_data; 23235a6ac9eaSMiao Xie int stripe; 23245a6ac9eaSMiao Xie int pagenr; 2325c17af965SDavid Sterba bool has_qstripe; 23265a6ac9eaSMiao Xie struct page *p_page = NULL; 23275a6ac9eaSMiao Xie struct page *q_page = NULL; 23285a6ac9eaSMiao Xie struct bio_list bio_list; 23295a6ac9eaSMiao Xie struct bio *bio; 233076035976SMiao Xie int is_replace = 0; 23315a6ac9eaSMiao Xie int ret; 23325a6ac9eaSMiao Xie 23335a6ac9eaSMiao Xie bio_list_init(&bio_list); 23345a6ac9eaSMiao Xie 2335c17af965SDavid Sterba if (rbio->real_stripes - rbio->nr_data == 1) 2336c17af965SDavid Sterba has_qstripe = false; 2337c17af965SDavid Sterba else if (rbio->real_stripes - rbio->nr_data == 2) 2338c17af965SDavid Sterba has_qstripe = true; 2339c17af965SDavid Sterba else 23405a6ac9eaSMiao Xie BUG(); 23415a6ac9eaSMiao Xie 234276035976SMiao Xie if (bbio->num_tgtdevs && bbio->tgtdev_map[rbio->scrubp]) { 234376035976SMiao Xie is_replace = 1; 234476035976SMiao Xie bitmap_copy(pbitmap, rbio->dbitmap, rbio->stripe_npages); 234576035976SMiao Xie } 234676035976SMiao Xie 23475a6ac9eaSMiao Xie /* 23485a6ac9eaSMiao Xie * Because the higher layers(scrubber) are unlikely to 23495a6ac9eaSMiao Xie * use this area of the disk again soon, so don't cache 23505a6ac9eaSMiao Xie * it. 23515a6ac9eaSMiao Xie */ 23525a6ac9eaSMiao Xie clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags); 23535a6ac9eaSMiao Xie 23545a6ac9eaSMiao Xie if (!need_check) 23555a6ac9eaSMiao Xie goto writeback; 23565a6ac9eaSMiao Xie 23575a6ac9eaSMiao Xie p_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 23585a6ac9eaSMiao Xie if (!p_page) 23595a6ac9eaSMiao Xie goto cleanup; 23605a6ac9eaSMiao Xie SetPageUptodate(p_page); 23615a6ac9eaSMiao Xie 2362c17af965SDavid Sterba if (has_qstripe) { 23635a6ac9eaSMiao Xie q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 23645a6ac9eaSMiao Xie if (!q_page) { 23655a6ac9eaSMiao Xie __free_page(p_page); 23665a6ac9eaSMiao Xie goto cleanup; 23675a6ac9eaSMiao Xie } 23685a6ac9eaSMiao Xie SetPageUptodate(q_page); 23695a6ac9eaSMiao Xie } 23705a6ac9eaSMiao Xie 23715a6ac9eaSMiao Xie atomic_set(&rbio->error, 0); 23725a6ac9eaSMiao Xie 23735a6ac9eaSMiao Xie for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { 23745a6ac9eaSMiao Xie struct page *p; 23755a6ac9eaSMiao Xie void *parity; 23765a6ac9eaSMiao Xie /* first collect one page from each data stripe */ 23775a6ac9eaSMiao Xie for (stripe = 0; stripe < nr_data; stripe++) { 23785a6ac9eaSMiao Xie p = page_in_rbio(rbio, stripe, pagenr, 0); 23795a6ac9eaSMiao Xie pointers[stripe] = kmap(p); 23805a6ac9eaSMiao Xie } 23815a6ac9eaSMiao Xie 23825a6ac9eaSMiao Xie /* then add the parity stripe */ 23835a6ac9eaSMiao Xie pointers[stripe++] = kmap(p_page); 23845a6ac9eaSMiao Xie 2385c17af965SDavid Sterba if (has_qstripe) { 23865a6ac9eaSMiao Xie /* 23875a6ac9eaSMiao Xie * raid6, add the qstripe and call the 23885a6ac9eaSMiao Xie * library function to fill in our p/q 23895a6ac9eaSMiao Xie */ 23905a6ac9eaSMiao Xie pointers[stripe++] = kmap(q_page); 23915a6ac9eaSMiao Xie 23922c8cdd6eSMiao Xie raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE, 23935a6ac9eaSMiao Xie pointers); 23945a6ac9eaSMiao Xie } else { 23955a6ac9eaSMiao Xie /* raid5 */ 239669d24804SDavid Sterba copy_page(pointers[nr_data], pointers[0]); 239709cbfeafSKirill A. Shutemov run_xor(pointers + 1, nr_data - 1, PAGE_SIZE); 23985a6ac9eaSMiao Xie } 23995a6ac9eaSMiao Xie 240001327610SNicholas D Steeves /* Check scrubbing parity and repair it */ 24015a6ac9eaSMiao Xie p = rbio_stripe_page(rbio, rbio->scrubp, pagenr); 24025a6ac9eaSMiao Xie parity = kmap(p); 240309cbfeafSKirill A. Shutemov if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE)) 240469d24804SDavid Sterba copy_page(parity, pointers[rbio->scrubp]); 24055a6ac9eaSMiao Xie else 24065a6ac9eaSMiao Xie /* Parity is right, needn't writeback */ 24075a6ac9eaSMiao Xie bitmap_clear(rbio->dbitmap, pagenr, 1); 24085a6ac9eaSMiao Xie kunmap(p); 24095a6ac9eaSMiao Xie 24103897b6f0SAndrea Righi for (stripe = 0; stripe < nr_data; stripe++) 24115a6ac9eaSMiao Xie kunmap(page_in_rbio(rbio, stripe, pagenr, 0)); 24123897b6f0SAndrea Righi kunmap(p_page); 24135a6ac9eaSMiao Xie } 24145a6ac9eaSMiao Xie 24155a6ac9eaSMiao Xie __free_page(p_page); 24165a6ac9eaSMiao Xie if (q_page) 24175a6ac9eaSMiao Xie __free_page(q_page); 24185a6ac9eaSMiao Xie 24195a6ac9eaSMiao Xie writeback: 24205a6ac9eaSMiao Xie /* 24215a6ac9eaSMiao Xie * time to start writing. Make bios for everything from the 24225a6ac9eaSMiao Xie * higher layers (the bio_list in our rbio) and our p/q. Ignore 24235a6ac9eaSMiao Xie * everything else. 24245a6ac9eaSMiao Xie */ 24255a6ac9eaSMiao Xie for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { 24265a6ac9eaSMiao Xie struct page *page; 24275a6ac9eaSMiao Xie 24285a6ac9eaSMiao Xie page = rbio_stripe_page(rbio, rbio->scrubp, pagenr); 24295a6ac9eaSMiao Xie ret = rbio_add_io_page(rbio, &bio_list, 24305a6ac9eaSMiao Xie page, rbio->scrubp, pagenr, rbio->stripe_len); 24315a6ac9eaSMiao Xie if (ret) 24325a6ac9eaSMiao Xie goto cleanup; 24335a6ac9eaSMiao Xie } 24345a6ac9eaSMiao Xie 243576035976SMiao Xie if (!is_replace) 243676035976SMiao Xie goto submit_write; 243776035976SMiao Xie 243876035976SMiao Xie for_each_set_bit(pagenr, pbitmap, rbio->stripe_npages) { 243976035976SMiao Xie struct page *page; 244076035976SMiao Xie 244176035976SMiao Xie page = rbio_stripe_page(rbio, rbio->scrubp, pagenr); 244276035976SMiao Xie ret = rbio_add_io_page(rbio, &bio_list, page, 244376035976SMiao Xie bbio->tgtdev_map[rbio->scrubp], 244476035976SMiao Xie pagenr, rbio->stripe_len); 244576035976SMiao Xie if (ret) 244676035976SMiao Xie goto cleanup; 244776035976SMiao Xie } 244876035976SMiao Xie 244976035976SMiao Xie submit_write: 24505a6ac9eaSMiao Xie nr_data = bio_list_size(&bio_list); 24515a6ac9eaSMiao Xie if (!nr_data) { 24525a6ac9eaSMiao Xie /* Every parity is right */ 245358efbc9fSOmar Sandoval rbio_orig_end_io(rbio, BLK_STS_OK); 24545a6ac9eaSMiao Xie return; 24555a6ac9eaSMiao Xie } 24565a6ac9eaSMiao Xie 24575a6ac9eaSMiao Xie atomic_set(&rbio->stripes_pending, nr_data); 24585a6ac9eaSMiao Xie 2459bf28a605SNikolay Borisov while ((bio = bio_list_pop(&bio_list))) { 24605a6ac9eaSMiao Xie bio->bi_private = rbio; 2461a6111d11SZhao Lei bio->bi_end_io = raid_write_end_io; 2462ebcc3263SDavid Sterba bio->bi_opf = REQ_OP_WRITE; 24634e49ea4aSMike Christie 24644e49ea4aSMike Christie submit_bio(bio); 24655a6ac9eaSMiao Xie } 24665a6ac9eaSMiao Xie return; 24675a6ac9eaSMiao Xie 24685a6ac9eaSMiao Xie cleanup: 246958efbc9fSOmar Sandoval rbio_orig_end_io(rbio, BLK_STS_IOERR); 2470785884fcSLiu Bo 2471785884fcSLiu Bo while ((bio = bio_list_pop(&bio_list))) 2472785884fcSLiu Bo bio_put(bio); 24735a6ac9eaSMiao Xie } 24745a6ac9eaSMiao Xie 24755a6ac9eaSMiao Xie static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe) 24765a6ac9eaSMiao Xie { 24775a6ac9eaSMiao Xie if (stripe >= 0 && stripe < rbio->nr_data) 24785a6ac9eaSMiao Xie return 1; 24795a6ac9eaSMiao Xie return 0; 24805a6ac9eaSMiao Xie } 24815a6ac9eaSMiao Xie 24825a6ac9eaSMiao Xie /* 24835a6ac9eaSMiao Xie * While we're doing the parity check and repair, we could have errors 24845a6ac9eaSMiao Xie * in reading pages off the disk. This checks for errors and if we're 24855a6ac9eaSMiao Xie * not able to read the page it'll trigger parity reconstruction. The 24865a6ac9eaSMiao Xie * parity scrub will be finished after we've reconstructed the failed 24875a6ac9eaSMiao Xie * stripes 24885a6ac9eaSMiao Xie */ 24895a6ac9eaSMiao Xie static void validate_rbio_for_parity_scrub(struct btrfs_raid_bio *rbio) 24905a6ac9eaSMiao Xie { 24915a6ac9eaSMiao Xie if (atomic_read(&rbio->error) > rbio->bbio->max_errors) 24925a6ac9eaSMiao Xie goto cleanup; 24935a6ac9eaSMiao Xie 24945a6ac9eaSMiao Xie if (rbio->faila >= 0 || rbio->failb >= 0) { 24955a6ac9eaSMiao Xie int dfail = 0, failp = -1; 24965a6ac9eaSMiao Xie 24975a6ac9eaSMiao Xie if (is_data_stripe(rbio, rbio->faila)) 24985a6ac9eaSMiao Xie dfail++; 24995a6ac9eaSMiao Xie else if (is_parity_stripe(rbio->faila)) 25005a6ac9eaSMiao Xie failp = rbio->faila; 25015a6ac9eaSMiao Xie 25025a6ac9eaSMiao Xie if (is_data_stripe(rbio, rbio->failb)) 25035a6ac9eaSMiao Xie dfail++; 25045a6ac9eaSMiao Xie else if (is_parity_stripe(rbio->failb)) 25055a6ac9eaSMiao Xie failp = rbio->failb; 25065a6ac9eaSMiao Xie 25075a6ac9eaSMiao Xie /* 25085a6ac9eaSMiao Xie * Because we can not use a scrubbing parity to repair 25095a6ac9eaSMiao Xie * the data, so the capability of the repair is declined. 25105a6ac9eaSMiao Xie * (In the case of RAID5, we can not repair anything) 25115a6ac9eaSMiao Xie */ 25125a6ac9eaSMiao Xie if (dfail > rbio->bbio->max_errors - 1) 25135a6ac9eaSMiao Xie goto cleanup; 25145a6ac9eaSMiao Xie 25155a6ac9eaSMiao Xie /* 25165a6ac9eaSMiao Xie * If all data is good, only parity is correctly, just 25175a6ac9eaSMiao Xie * repair the parity. 25185a6ac9eaSMiao Xie */ 25195a6ac9eaSMiao Xie if (dfail == 0) { 25205a6ac9eaSMiao Xie finish_parity_scrub(rbio, 0); 25215a6ac9eaSMiao Xie return; 25225a6ac9eaSMiao Xie } 25235a6ac9eaSMiao Xie 25245a6ac9eaSMiao Xie /* 25255a6ac9eaSMiao Xie * Here means we got one corrupted data stripe and one 25265a6ac9eaSMiao Xie * corrupted parity on RAID6, if the corrupted parity 252701327610SNicholas D Steeves * is scrubbing parity, luckily, use the other one to repair 25285a6ac9eaSMiao Xie * the data, or we can not repair the data stripe. 25295a6ac9eaSMiao Xie */ 25305a6ac9eaSMiao Xie if (failp != rbio->scrubp) 25315a6ac9eaSMiao Xie goto cleanup; 25325a6ac9eaSMiao Xie 25335a6ac9eaSMiao Xie __raid_recover_end_io(rbio); 25345a6ac9eaSMiao Xie } else { 25355a6ac9eaSMiao Xie finish_parity_scrub(rbio, 1); 25365a6ac9eaSMiao Xie } 25375a6ac9eaSMiao Xie return; 25385a6ac9eaSMiao Xie 25395a6ac9eaSMiao Xie cleanup: 254058efbc9fSOmar Sandoval rbio_orig_end_io(rbio, BLK_STS_IOERR); 25415a6ac9eaSMiao Xie } 25425a6ac9eaSMiao Xie 25435a6ac9eaSMiao Xie /* 25445a6ac9eaSMiao Xie * end io for the read phase of the rmw cycle. All the bios here are physical 25455a6ac9eaSMiao Xie * stripe bios we've read from the disk so we can recalculate the parity of the 25465a6ac9eaSMiao Xie * stripe. 25475a6ac9eaSMiao Xie * 25485a6ac9eaSMiao Xie * This will usually kick off finish_rmw once all the bios are read in, but it 25495a6ac9eaSMiao Xie * may trigger parity reconstruction if we had any errors along the way 25505a6ac9eaSMiao Xie */ 25514246a0b6SChristoph Hellwig static void raid56_parity_scrub_end_io(struct bio *bio) 25525a6ac9eaSMiao Xie { 25535a6ac9eaSMiao Xie struct btrfs_raid_bio *rbio = bio->bi_private; 25545a6ac9eaSMiao Xie 25554e4cbee9SChristoph Hellwig if (bio->bi_status) 25565a6ac9eaSMiao Xie fail_bio_stripe(rbio, bio); 25575a6ac9eaSMiao Xie else 25585a6ac9eaSMiao Xie set_bio_pages_uptodate(bio); 25595a6ac9eaSMiao Xie 25605a6ac9eaSMiao Xie bio_put(bio); 25615a6ac9eaSMiao Xie 25625a6ac9eaSMiao Xie if (!atomic_dec_and_test(&rbio->stripes_pending)) 25635a6ac9eaSMiao Xie return; 25645a6ac9eaSMiao Xie 25655a6ac9eaSMiao Xie /* 25665a6ac9eaSMiao Xie * this will normally call finish_rmw to start our write 25675a6ac9eaSMiao Xie * but if there are any failed stripes we'll reconstruct 25685a6ac9eaSMiao Xie * from parity first 25695a6ac9eaSMiao Xie */ 25705a6ac9eaSMiao Xie validate_rbio_for_parity_scrub(rbio); 25715a6ac9eaSMiao Xie } 25725a6ac9eaSMiao Xie 25735a6ac9eaSMiao Xie static void raid56_parity_scrub_stripe(struct btrfs_raid_bio *rbio) 25745a6ac9eaSMiao Xie { 25755a6ac9eaSMiao Xie int bios_to_read = 0; 25765a6ac9eaSMiao Xie struct bio_list bio_list; 25775a6ac9eaSMiao Xie int ret; 25785a6ac9eaSMiao Xie int pagenr; 25795a6ac9eaSMiao Xie int stripe; 25805a6ac9eaSMiao Xie struct bio *bio; 25815a6ac9eaSMiao Xie 2582785884fcSLiu Bo bio_list_init(&bio_list); 2583785884fcSLiu Bo 25845a6ac9eaSMiao Xie ret = alloc_rbio_essential_pages(rbio); 25855a6ac9eaSMiao Xie if (ret) 25865a6ac9eaSMiao Xie goto cleanup; 25875a6ac9eaSMiao Xie 25885a6ac9eaSMiao Xie atomic_set(&rbio->error, 0); 25895a6ac9eaSMiao Xie /* 25905a6ac9eaSMiao Xie * build a list of bios to read all the missing parts of this 25915a6ac9eaSMiao Xie * stripe 25925a6ac9eaSMiao Xie */ 25932c8cdd6eSMiao Xie for (stripe = 0; stripe < rbio->real_stripes; stripe++) { 25945a6ac9eaSMiao Xie for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) { 25955a6ac9eaSMiao Xie struct page *page; 25965a6ac9eaSMiao Xie /* 25975a6ac9eaSMiao Xie * we want to find all the pages missing from 25985a6ac9eaSMiao Xie * the rbio and read them from the disk. If 25995a6ac9eaSMiao Xie * page_in_rbio finds a page in the bio list 26005a6ac9eaSMiao Xie * we don't need to read it off the stripe. 26015a6ac9eaSMiao Xie */ 26025a6ac9eaSMiao Xie page = page_in_rbio(rbio, stripe, pagenr, 1); 26035a6ac9eaSMiao Xie if (page) 26045a6ac9eaSMiao Xie continue; 26055a6ac9eaSMiao Xie 26065a6ac9eaSMiao Xie page = rbio_stripe_page(rbio, stripe, pagenr); 26075a6ac9eaSMiao Xie /* 26085a6ac9eaSMiao Xie * the bio cache may have handed us an uptodate 26095a6ac9eaSMiao Xie * page. If so, be happy and use it 26105a6ac9eaSMiao Xie */ 26115a6ac9eaSMiao Xie if (PageUptodate(page)) 26125a6ac9eaSMiao Xie continue; 26135a6ac9eaSMiao Xie 26145a6ac9eaSMiao Xie ret = rbio_add_io_page(rbio, &bio_list, page, 26155a6ac9eaSMiao Xie stripe, pagenr, rbio->stripe_len); 26165a6ac9eaSMiao Xie if (ret) 26175a6ac9eaSMiao Xie goto cleanup; 26185a6ac9eaSMiao Xie } 26195a6ac9eaSMiao Xie } 26205a6ac9eaSMiao Xie 26215a6ac9eaSMiao Xie bios_to_read = bio_list_size(&bio_list); 26225a6ac9eaSMiao Xie if (!bios_to_read) { 26235a6ac9eaSMiao Xie /* 26245a6ac9eaSMiao Xie * this can happen if others have merged with 26255a6ac9eaSMiao Xie * us, it means there is nothing left to read. 26265a6ac9eaSMiao Xie * But if there are missing devices it may not be 26275a6ac9eaSMiao Xie * safe to do the full stripe write yet. 26285a6ac9eaSMiao Xie */ 26295a6ac9eaSMiao Xie goto finish; 26305a6ac9eaSMiao Xie } 26315a6ac9eaSMiao Xie 26325a6ac9eaSMiao Xie /* 26335a6ac9eaSMiao Xie * the bbio may be freed once we submit the last bio. Make sure 26345a6ac9eaSMiao Xie * not to touch it after that 26355a6ac9eaSMiao Xie */ 26365a6ac9eaSMiao Xie atomic_set(&rbio->stripes_pending, bios_to_read); 2637bf28a605SNikolay Borisov while ((bio = bio_list_pop(&bio_list))) { 26385a6ac9eaSMiao Xie bio->bi_private = rbio; 26395a6ac9eaSMiao Xie bio->bi_end_io = raid56_parity_scrub_end_io; 2640ebcc3263SDavid Sterba bio->bi_opf = REQ_OP_READ; 26415a6ac9eaSMiao Xie 26420b246afaSJeff Mahoney btrfs_bio_wq_end_io(rbio->fs_info, bio, BTRFS_WQ_ENDIO_RAID56); 26435a6ac9eaSMiao Xie 26444e49ea4aSMike Christie submit_bio(bio); 26455a6ac9eaSMiao Xie } 26465a6ac9eaSMiao Xie /* the actual write will happen once the reads are done */ 26475a6ac9eaSMiao Xie return; 26485a6ac9eaSMiao Xie 26495a6ac9eaSMiao Xie cleanup: 265058efbc9fSOmar Sandoval rbio_orig_end_io(rbio, BLK_STS_IOERR); 2651785884fcSLiu Bo 2652785884fcSLiu Bo while ((bio = bio_list_pop(&bio_list))) 2653785884fcSLiu Bo bio_put(bio); 2654785884fcSLiu Bo 26555a6ac9eaSMiao Xie return; 26565a6ac9eaSMiao Xie 26575a6ac9eaSMiao Xie finish: 26585a6ac9eaSMiao Xie validate_rbio_for_parity_scrub(rbio); 26595a6ac9eaSMiao Xie } 26605a6ac9eaSMiao Xie 26615a6ac9eaSMiao Xie static void scrub_parity_work(struct btrfs_work *work) 26625a6ac9eaSMiao Xie { 26635a6ac9eaSMiao Xie struct btrfs_raid_bio *rbio; 26645a6ac9eaSMiao Xie 26655a6ac9eaSMiao Xie rbio = container_of(work, struct btrfs_raid_bio, work); 26665a6ac9eaSMiao Xie raid56_parity_scrub_stripe(rbio); 26675a6ac9eaSMiao Xie } 26685a6ac9eaSMiao Xie 26695a6ac9eaSMiao Xie void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio) 26705a6ac9eaSMiao Xie { 26715a6ac9eaSMiao Xie if (!lock_stripe_add(rbio)) 2672a81b747dSDavid Sterba start_async_work(rbio, scrub_parity_work); 26735a6ac9eaSMiao Xie } 2674b4ee1782SOmar Sandoval 2675b4ee1782SOmar Sandoval /* The following code is used for dev replace of a missing RAID 5/6 device. */ 2676b4ee1782SOmar Sandoval 2677b4ee1782SOmar Sandoval struct btrfs_raid_bio * 26782ff7e61eSJeff Mahoney raid56_alloc_missing_rbio(struct btrfs_fs_info *fs_info, struct bio *bio, 2679b4ee1782SOmar Sandoval struct btrfs_bio *bbio, u64 length) 2680b4ee1782SOmar Sandoval { 2681b4ee1782SOmar Sandoval struct btrfs_raid_bio *rbio; 2682b4ee1782SOmar Sandoval 26832ff7e61eSJeff Mahoney rbio = alloc_rbio(fs_info, bbio, length); 2684b4ee1782SOmar Sandoval if (IS_ERR(rbio)) 2685b4ee1782SOmar Sandoval return NULL; 2686b4ee1782SOmar Sandoval 2687b4ee1782SOmar Sandoval rbio->operation = BTRFS_RBIO_REBUILD_MISSING; 2688b4ee1782SOmar Sandoval bio_list_add(&rbio->bio_list, bio); 2689b4ee1782SOmar Sandoval /* 2690b4ee1782SOmar Sandoval * This is a special bio which is used to hold the completion handler 2691b4ee1782SOmar Sandoval * and make the scrub rbio is similar to the other types 2692b4ee1782SOmar Sandoval */ 2693b4ee1782SOmar Sandoval ASSERT(!bio->bi_iter.bi_size); 2694b4ee1782SOmar Sandoval 2695b4ee1782SOmar Sandoval rbio->faila = find_logical_bio_stripe(rbio, bio); 2696b4ee1782SOmar Sandoval if (rbio->faila == -1) { 2697b4ee1782SOmar Sandoval BUG(); 2698b4ee1782SOmar Sandoval kfree(rbio); 2699b4ee1782SOmar Sandoval return NULL; 2700b4ee1782SOmar Sandoval } 2701b4ee1782SOmar Sandoval 2702ae6529c3SQu Wenruo /* 2703ae6529c3SQu Wenruo * When we get bbio, we have already increased bio_counter, record it 2704ae6529c3SQu Wenruo * so we can free it at rbio_orig_end_io() 2705ae6529c3SQu Wenruo */ 2706ae6529c3SQu Wenruo rbio->generic_bio_cnt = 1; 2707ae6529c3SQu Wenruo 2708b4ee1782SOmar Sandoval return rbio; 2709b4ee1782SOmar Sandoval } 2710b4ee1782SOmar Sandoval 2711b4ee1782SOmar Sandoval void raid56_submit_missing_rbio(struct btrfs_raid_bio *rbio) 2712b4ee1782SOmar Sandoval { 2713b4ee1782SOmar Sandoval if (!lock_stripe_add(rbio)) 2714e66d8d5aSDavid Sterba start_async_work(rbio, read_rebuild_work); 2715b4ee1782SOmar Sandoval } 2716