xref: /linux/drivers/md/raid10.h (revision 37a93dd5c49b5fda807fd204edf2547c3493319c)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _RAID10_H
3 #define _RAID10_H
4 
5 /* Note: raid10_info.rdev can be set to NULL asynchronously by
6  * raid10_remove_disk.
7  * There are three safe ways to access raid10_info.rdev.
8  * 1/ when holding mddev->reconfig_mutex
9  * 2/ when resync/recovery/reshape is known to be happening - i.e. in code
10  *    that is called as part of performing resync/recovery/reshape.
11  * 3/ while holding rcu_read_lock(), use rcu_dereference to get the pointer
12  *    and if it is non-NULL, increment rdev->nr_pending before dropping the
13  *    RCU lock.
14  * When .rdev is set to NULL, the nr_pending count checked again and if it has
15  * been incremented, the pointer is put back in .rdev.
16  */
17 
18 struct raid10_info {
19 	struct md_rdev	*rdev, *replacement;
20 	sector_t	head_position;
21 };
22 
23 struct r10conf {
24 	struct mddev		*mddev;
25 	struct raid10_info	*mirrors;
26 	struct raid10_info	*mirrors_new, *mirrors_old;
27 	spinlock_t		device_lock;
28 
29 	/* geometry */
30 	struct geom {
31 		int		raid_disks;
32 		int		near_copies;  /* number of copies laid out
33 					       * raid0 style */
34 		int		far_copies;   /* number of copies laid out
35 					       * at large strides across drives
36 					       */
37 		int		far_offset;   /* far_copies are offset by 1
38 					       * stripe instead of many
39 					       */
40 		sector_t	stride;	      /* distance between far copies.
41 					       * This is size / far_copies unless
42 					       * far_offset, in which case it is
43 					       * 1 stripe.
44 					       */
45 		int             far_set_size; /* The number of devices in a set,
46 					       * where a 'set' are devices that
47 					       * contain far/offset copies of
48 					       * each other.
49 					       */
50 		int		chunk_shift; /* shift from chunks to sectors */
51 		sector_t	chunk_mask;
52 	} prev, geo;
53 	int			copies;	      /* near_copies * far_copies.
54 					       * must be <= raid_disks
55 					       */
56 
57 	sector_t		dev_sectors;  /* temp copy of
58 					       * mddev->dev_sectors */
59 	sector_t		reshape_progress;
60 	sector_t		reshape_safe;
61 	unsigned long		reshape_checkpoint;
62 	sector_t		offset_diff;
63 
64 	struct list_head	retry_list;
65 	/* A separate list of r1bio which just need raid_end_bio_io called.
66 	 * This mustn't happen for writes which had any errors if the superblock
67 	 * needs to be written.
68 	 */
69 	struct list_head	bio_end_io_list;
70 
71 	/* queue pending writes and submit them on unplug */
72 	struct bio_list		pending_bio_list;
73 
74 	seqlock_t		resync_lock;
75 	atomic_t		nr_pending;
76 	int			nr_waiting;
77 	int			nr_queued;
78 	int			barrier;
79 	int			array_freeze_pending;
80 	sector_t		next_resync;
81 	int			fullsync;  /* set to 1 if a full sync is needed,
82 					    * (fresh device added).
83 					    * Cleared when a sync completes.
84 					    */
85 	int			have_replacement; /* There is at least one
86 						   * replacement device.
87 						   */
88 	wait_queue_head_t	wait_barrier;
89 
90 	mempool_t		r10bio_pool;
91 	mempool_t		r10buf_pool;
92 	struct page		*tmppage;
93 	struct bio_set		bio_split;
94 
95 	/* When taking over an array from a different personality, we store
96 	 * the new thread here until we fully activate the array.
97 	 */
98 	struct md_thread __rcu	*thread;
99 
100 	/*
101 	 * Keep track of cluster resync window to send to other nodes.
102 	 */
103 	sector_t		cluster_sync_low;
104 	sector_t		cluster_sync_high;
105 };
106 
107 /*
108  * this is our 'private' RAID10 bio.
109  *
110  * it contains information about what kind of IO operations were started
111  * for this RAID10 operation, and about their status:
112  */
113 
114 struct r10bio {
115 	atomic_t		remaining; /* 'have we finished' count,
116 					    * used from IRQ handlers
117 					    */
118 	sector_t		sector;	/* virtual sector number */
119 	int			sectors;
120 	unsigned long		state;
121 	struct mddev		*mddev;
122 	/*
123 	 * original bio going to /dev/mdx
124 	 */
125 	struct bio		*master_bio;
126 	/*
127 	 * if the IO is in READ direction, then this is where we read
128 	 */
129 	int			read_slot;
130 
131 	struct list_head	retry_list;
132 	/*
133 	 * if the IO is in WRITE direction, then multiple bios are used,
134 	 * one for each copy.
135 	 * When resyncing we also use one for each copy.
136 	 * When reconstructing, we use 2 bios, one for read, one for write.
137 	 * We choose the number when they are allocated.
138 	 * We sometimes need an extra bio to write to the replacement.
139 	 */
140 	struct r10dev {
141 		struct bio	*bio;
142 		union {
143 			struct bio	*repl_bio; /* used for resync and
144 						    * writes */
145 			struct md_rdev	*rdev;	   /* used for reads
146 						    * (read_slot >= 0) */
147 		};
148 		sector_t	addr;
149 		int		devnum;
150 	} devs[];
151 };
152 
153 /* bits for r10bio.state */
154 enum r10bio_state {
155 	R10BIO_Uptodate,
156 	R10BIO_IsSync,
157 	R10BIO_IsRecover,
158 	R10BIO_IsReshape,
159 /* Set ReadError on bios that experience a read error
160  * so that raid10d knows what to do with them.
161  */
162 	R10BIO_ReadError,
163 /* For bio_split errors, record that bi_end_io was called. */
164 	R10BIO_Returned,
165 /* If a write for this request means we can clear some
166  * known-bad-block records, we set this flag.
167  */
168 	R10BIO_MadeGood,
169 	R10BIO_WriteError,
170 /* During a reshape we might be performing IO on the
171  * 'previous' part of the array, in which case this
172  * flag is set
173  */
174 	R10BIO_Previous,
175 /* failfast devices did receive failfast requests. */
176 	R10BIO_FailFast,
177 	R10BIO_Discard,
178 };
179 #endif
180