1b2441318SGreg Kroah-Hartman /* SPDX-License-Identifier: GPL-2.0 */ 2ef740c37SChristoph Hellwig #ifndef _RAID1_H 3ef740c37SChristoph Hellwig #define _RAID1_H 4ef740c37SChristoph Hellwig 5fd76863eScolyli@suse.de /* 6fd76863eScolyli@suse.de * each barrier unit size is 64MB fow now 7fd76863eScolyli@suse.de * note: it must be larger than RESYNC_DEPTH 8fd76863eScolyli@suse.de */ 9fd76863eScolyli@suse.de #define BARRIER_UNIT_SECTOR_BITS 17 10fd76863eScolyli@suse.de #define BARRIER_UNIT_SECTOR_SIZE (1<<17) 11fd76863eScolyli@suse.de /* 12fd76863eScolyli@suse.de * In struct r1conf, the following members are related to I/O barrier 13fd76863eScolyli@suse.de * buckets, 14824e47daScolyli@suse.de * atomic_t *nr_pending; 15824e47daScolyli@suse.de * atomic_t *nr_waiting; 16824e47daScolyli@suse.de * atomic_t *nr_queued; 17824e47daScolyli@suse.de * atomic_t *barrier; 18824e47daScolyli@suse.de * Each of them points to array of atomic_t variables, each array is 19824e47daScolyli@suse.de * designed to have BARRIER_BUCKETS_NR elements and occupy a single 20824e47daScolyli@suse.de * memory page. The data width of atomic_t variables is 4 bytes, equal 21824e47daScolyli@suse.de * to 1<<(ilog2(sizeof(atomic_t))), BARRIER_BUCKETS_NR_BITS is defined 22824e47daScolyli@suse.de * as (PAGE_SHIFT - ilog2(sizeof(int))) to make sure an array of 23824e47daScolyli@suse.de * atomic_t variables with BARRIER_BUCKETS_NR elements just exactly 24824e47daScolyli@suse.de * occupies a single memory page. 25fd76863eScolyli@suse.de */ 26824e47daScolyli@suse.de #define BARRIER_BUCKETS_NR_BITS (PAGE_SHIFT - ilog2(sizeof(atomic_t))) 27fd76863eScolyli@suse.de #define BARRIER_BUCKETS_NR (1<<BARRIER_BUCKETS_NR_BITS) 28fd76863eScolyli@suse.de 29f2785b52SNeilBrown /* Note: raid1_info.rdev can be set to NULL asynchronously by raid1_remove_disk. 30f2785b52SNeilBrown * There are three safe ways to access raid1_info.rdev. 31f2785b52SNeilBrown * 1/ when holding mddev->reconfig_mutex 32f2785b52SNeilBrown * 2/ when resync/recovery is known to be happening - i.e. in code that is 33f2785b52SNeilBrown * called as part of performing resync/recovery. 34f2785b52SNeilBrown * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer 35f2785b52SNeilBrown * and if it is non-NULL, increment rdev->nr_pending before dropping the 36f2785b52SNeilBrown * RCU lock. 37f2785b52SNeilBrown * When .rdev is set to NULL, the nr_pending count checked again and if it has 38f2785b52SNeilBrown * been incremented, the pointer is put back in .rdev. 39f2785b52SNeilBrown */ 40f2785b52SNeilBrown 410eaf822cSJonathan Brassow struct raid1_info { 423cb03002SNeilBrown struct md_rdev *rdev; 43ef740c37SChristoph Hellwig sector_t head_position; 44be4d3280SShaohua Li 45be4d3280SShaohua Li /* When choose the best device for a read (read_balance()) 46be4d3280SShaohua Li * we try to keep sequential reads one the same device 47be4d3280SShaohua Li */ 48be4d3280SShaohua Li sector_t next_seq_sect; 4912cee5a8SShaohua Li sector_t seq_start; 50ef740c37SChristoph Hellwig }; 51ef740c37SChristoph Hellwig 52ef740c37SChristoph Hellwig /* 53ef740c37SChristoph Hellwig * memory pools need a pointer to the mddev, so they can force an unplug 54ef740c37SChristoph Hellwig * when memory is tight, and a count of the number of drives that the 55ef740c37SChristoph Hellwig * pool was allocated for, so they know how much to allocate and free. 56ef740c37SChristoph Hellwig * mddev->raid_disks cannot be used, as it can change while a pool is active 57ef740c37SChristoph Hellwig * These two datums are stored in a kmalloced struct. 588f19ccb2SNeilBrown * The 'raid_disks' here is twice the raid_disks in r1conf. 598f19ccb2SNeilBrown * This allows space for each 'real' device can have a replacement in the 608f19ccb2SNeilBrown * second half of the array. 61ef740c37SChristoph Hellwig */ 62ef740c37SChristoph Hellwig 63ef740c37SChristoph Hellwig struct pool_info { 64fd01b88cSNeilBrown struct mddev *mddev; 65ef740c37SChristoph Hellwig int raid_disks; 66ef740c37SChristoph Hellwig }; 67ef740c37SChristoph Hellwig 68e8096360SNeilBrown struct r1conf { 69fd01b88cSNeilBrown struct mddev *mddev; 700eaf822cSJonathan Brassow struct raid1_info *mirrors; /* twice 'raid_disks' to 718f19ccb2SNeilBrown * allow for replacements. 728f19ccb2SNeilBrown */ 73ef740c37SChristoph Hellwig int raid_disks; 74ce550c20SNeilBrown 75ef740c37SChristoph Hellwig spinlock_t device_lock; 76ef740c37SChristoph Hellwig 779f2c9d12SNeilBrown /* list of 'struct r1bio' that need to be processed by raid1d, 789f2c9d12SNeilBrown * whether to retry a read, writeout a resync or recovery 799f2c9d12SNeilBrown * block, or anything else. 80ce550c20SNeilBrown */ 81ef740c37SChristoph Hellwig struct list_head retry_list; 8255ce74d4SNeilBrown /* A separate list of r1bio which just need raid_end_bio_io called. 8355ce74d4SNeilBrown * This mustn't happen for writes which had any errors if the superblock 8455ce74d4SNeilBrown * needs to be written. 8555ce74d4SNeilBrown */ 8655ce74d4SNeilBrown struct list_head bio_end_io_list; 87ce550c20SNeilBrown 88ce550c20SNeilBrown /* queue pending writes to be submitted on unplug */ 89ef740c37SChristoph Hellwig struct bio_list pending_bio_list; 90ef740c37SChristoph Hellwig 91ce550c20SNeilBrown /* for use when syncing mirrors: 92ce550c20SNeilBrown * We don't allow both normal IO and resync/recovery IO at 93ce550c20SNeilBrown * the same time - resync/recovery can only happen when there 94ce550c20SNeilBrown * is no other IO. So when either is active, the other has to wait. 95ce550c20SNeilBrown * See more details description in raid1.c near raise_barrier(). 96ce550c20SNeilBrown */ 97ce550c20SNeilBrown wait_queue_head_t wait_barrier; 98ef740c37SChristoph Hellwig spinlock_t resync_lock; 9943ac9b84SXiao Ni atomic_t nr_sync_pending; 100824e47daScolyli@suse.de atomic_t *nr_pending; 101824e47daScolyli@suse.de atomic_t *nr_waiting; 102824e47daScolyli@suse.de atomic_t *nr_queued; 103824e47daScolyli@suse.de atomic_t *barrier; 104b364e3d0Smajianpeng int array_frozen; 105ce550c20SNeilBrown 106ce550c20SNeilBrown /* Set to 1 if a full sync is needed, (fresh device added). 107ef740c37SChristoph Hellwig * Cleared when a sync completes. 108ef740c37SChristoph Hellwig */ 109ce550c20SNeilBrown int fullsync; 110ce550c20SNeilBrown 111ce550c20SNeilBrown /* When the same as mddev->recovery_disabled we don't allow 112ce550c20SNeilBrown * recovery to be attempted as we expect a read error. 1135389042fSNeilBrown */ 114ce550c20SNeilBrown int recovery_disabled; 115ef740c37SChristoph Hellwig 116ce550c20SNeilBrown /* poolinfo contains information about the content of the 117ce550c20SNeilBrown * mempools - it changes when the array grows or shrinks 118ce550c20SNeilBrown */ 119ef740c37SChristoph Hellwig struct pool_info *poolinfo; 120afeee514SKent Overstreet mempool_t r1bio_pool; 121afeee514SKent Overstreet mempool_t r1buf_pool; 122709ae487SNeilBrown 123afeee514SKent Overstreet struct bio_set bio_split; 124c230e7e5SNeilBrown 125ce550c20SNeilBrown /* temporary buffer to synchronous IO when attempting to repair 126ce550c20SNeilBrown * a read error. 127ce550c20SNeilBrown */ 128ce550c20SNeilBrown struct page *tmppage; 129ce550c20SNeilBrown 130709ae487SNeilBrown /* When taking over an array from a different personality, we store 131709ae487SNeilBrown * the new thread here until we fully activate the array. 132709ae487SNeilBrown */ 133*44693154SYu Kuai struct md_thread __rcu *thread; 134c40f341fSGoldwyn Rodrigues 135c40f341fSGoldwyn Rodrigues /* Keep track of cluster resync window to send to other 136c40f341fSGoldwyn Rodrigues * nodes. 137c40f341fSGoldwyn Rodrigues */ 138c40f341fSGoldwyn Rodrigues sector_t cluster_sync_low; 139c40f341fSGoldwyn Rodrigues sector_t cluster_sync_high; 140c40f341fSGoldwyn Rodrigues 141ef740c37SChristoph Hellwig }; 142ef740c37SChristoph Hellwig 143ef740c37SChristoph Hellwig /* 144ef740c37SChristoph Hellwig * this is our 'private' RAID1 bio. 145ef740c37SChristoph Hellwig * 146ef740c37SChristoph Hellwig * it contains information about what kind of IO operations were started 147ef740c37SChristoph Hellwig * for this RAID1 operation, and about their status: 148ef740c37SChristoph Hellwig */ 149ef740c37SChristoph Hellwig 1509f2c9d12SNeilBrown struct r1bio { 151ef740c37SChristoph Hellwig atomic_t remaining; /* 'have we finished' count, 152ef740c37SChristoph Hellwig * used from IRQ handlers 153ef740c37SChristoph Hellwig */ 154ef740c37SChristoph Hellwig atomic_t behind_remaining; /* number of write-behind ios remaining 155ef740c37SChristoph Hellwig * in this BehindIO request 156ef740c37SChristoph Hellwig */ 157ef740c37SChristoph Hellwig sector_t sector; 158ef740c37SChristoph Hellwig int sectors; 159ef740c37SChristoph Hellwig unsigned long state; 160a0159832SGuoqing Jiang unsigned long start_time; 161fd01b88cSNeilBrown struct mddev *mddev; 162ef740c37SChristoph Hellwig /* 163ef740c37SChristoph Hellwig * original bio going to /dev/mdx 164ef740c37SChristoph Hellwig */ 165ef740c37SChristoph Hellwig struct bio *master_bio; 166ef740c37SChristoph Hellwig /* 167ef740c37SChristoph Hellwig * if the IO is in READ direction, then this is where we read 168ef740c37SChristoph Hellwig */ 169ef740c37SChristoph Hellwig int read_disk; 170ef740c37SChristoph Hellwig 171ef740c37SChristoph Hellwig struct list_head retry_list; 172841c1316SMing Lei 173841c1316SMing Lei /* 174841c1316SMing Lei * When R1BIO_BehindIO is set, we store pages for write behind 175841c1316SMing Lei * in behind_master_bio. 176841c1316SMing Lei */ 177841c1316SMing Lei struct bio *behind_master_bio; 178841c1316SMing Lei 179ef740c37SChristoph Hellwig /* 180ef740c37SChristoph Hellwig * if the IO is in WRITE direction, then multiple bios are used. 181ef740c37SChristoph Hellwig * We choose the number when they are allocated. 182ef740c37SChristoph Hellwig */ 183358369f0SGustavo A. R. Silva struct bio *bios[]; 184ef740c37SChristoph Hellwig /* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/ 185ef740c37SChristoph Hellwig }; 186ef740c37SChristoph Hellwig 187ef740c37SChristoph Hellwig /* bits for r1bio.state */ 188be306c29SNeilBrown enum r1bio_state { 189be306c29SNeilBrown R1BIO_Uptodate, 190be306c29SNeilBrown R1BIO_IsSync, 191be306c29SNeilBrown R1BIO_Degraded, 192be306c29SNeilBrown R1BIO_BehindIO, 193d2eb35acSNeilBrown /* Set ReadError on bios that experience a readerror so that 194d2eb35acSNeilBrown * raid1d knows what to do with them. 195d2eb35acSNeilBrown */ 196be306c29SNeilBrown R1BIO_ReadError, 197ef740c37SChristoph Hellwig /* For write-behind requests, we call bi_end_io when 198ef740c37SChristoph Hellwig * the last non-write-behind device completes, providing 199ef740c37SChristoph Hellwig * any write was successful. Otherwise we call when 200ef740c37SChristoph Hellwig * any write-behind write succeeds, otherwise we call 201ef740c37SChristoph Hellwig * with failure when last write completes (and all failed). 202ef740c37SChristoph Hellwig * Record that bi_end_io was called with this flag... 203ef740c37SChristoph Hellwig */ 204be306c29SNeilBrown R1BIO_Returned, 2054367af55SNeilBrown /* If a write for this request means we can clear some 2064367af55SNeilBrown * known-bad-block records, we set this flag 2074367af55SNeilBrown */ 208be306c29SNeilBrown R1BIO_MadeGood, 209be306c29SNeilBrown R1BIO_WriteError, 2102e52d449SNeilBrown R1BIO_FailFast, 211be306c29SNeilBrown }; 212fd76863eScolyli@suse.de 213fd76863eScolyli@suse.de static inline int sector_to_idx(sector_t sector) 214fd76863eScolyli@suse.de { 215fd76863eScolyli@suse.de return hash_long(sector >> BARRIER_UNIT_SECTOR_BITS, 216fd76863eScolyli@suse.de BARRIER_BUCKETS_NR_BITS); 217fd76863eScolyli@suse.de } 218ef740c37SChristoph Hellwig #endif 219