xref: /linux/fs/btrfs/raid56.h (revision 1d5198dd08ac04b13a8b7539131baf0980998032)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 /*
3  * Copyright (C) 2012 Fusion-io  All rights reserved.
4  * Copyright (C) 2012 Intel Corp. All rights reserved.
5  */
6 
7 #ifndef BTRFS_RAID56_H
8 #define BTRFS_RAID56_H
9 
10 #include <linux/types.h>
11 #include <linux/list.h>
12 #include <linux/spinlock.h>
13 #include <linux/bio.h>
14 #include <linux/refcount.h>
15 #include <linux/workqueue.h>
16 #include "volumes.h"
17 
18 struct page;
19 struct sector_ptr;
20 struct btrfs_fs_info;
21 
22 enum btrfs_rbio_ops {
23 	BTRFS_RBIO_WRITE,
24 	BTRFS_RBIO_READ_REBUILD,
25 	BTRFS_RBIO_PARITY_SCRUB,
26 };
27 
28 struct btrfs_raid_bio {
29 	struct btrfs_io_context *bioc;
30 
31 	/*
32 	 * While we're doing RMW on a stripe we put it into a hash table so we
33 	 * can lock the stripe and merge more rbios into it.
34 	 */
35 	struct list_head hash_list;
36 
37 	/* LRU list for the stripe cache */
38 	struct list_head stripe_cache;
39 
40 	/* For scheduling work in the helper threads */
41 	struct work_struct work;
42 
43 	/*
44 	 * bio_list and bio_list_lock are used to add more bios into the stripe
45 	 * in hopes of avoiding the full RMW
46 	 */
47 	struct bio_list bio_list;
48 	spinlock_t bio_list_lock;
49 
50 	/*
51 	 * Also protected by the bio_list_lock, the plug list is used by the
52 	 * plugging code to collect partial bios while plugged.  The stripe
53 	 * locking code also uses it to hand off the stripe lock to the next
54 	 * pending IO.
55 	 */
56 	struct list_head plug_list;
57 
58 	/* Flags that tell us if it is safe to merge with this bio. */
59 	unsigned long flags;
60 
61 	/*
62 	 * Set if we're doing a parity rebuild for a read from higher up, which
63 	 * is handled differently from a parity rebuild as part of RMW.
64 	 */
65 	enum btrfs_rbio_ops operation;
66 
67 	/* How many pages there are for the full stripe including P/Q */
68 	u16 nr_pages;
69 
70 	/* How many sectors there are for the full stripe including P/Q */
71 	u16 nr_sectors;
72 
73 	/* Number of data stripes (no p/q) */
74 	u8 nr_data;
75 
76 	/* Number of all stripes (including P/Q) */
77 	u8 real_stripes;
78 
79 	/* How many pages there are for each stripe */
80 	u8 stripe_npages;
81 
82 	/* How many sectors there are for each stripe */
83 	u8 stripe_nsectors;
84 
85 	/* Stripe number that we're scrubbing  */
86 	u8 scrubp;
87 
88 	/*
89 	 * Size of all the bios in the bio_list.  This helps us decide if the
90 	 * rbio maps to a full stripe or not.
91 	 */
92 	int bio_list_bytes;
93 
94 	refcount_t refs;
95 
96 	atomic_t stripes_pending;
97 
98 	wait_queue_head_t io_wait;
99 
100 	/* Bitmap to record which horizontal stripe has data */
101 	unsigned long dbitmap;
102 
103 	/* Allocated with stripe_nsectors-many bits for finish_*() calls */
104 	unsigned long finish_pbitmap;
105 
106 	/*
107 	 * These are two arrays of pointers.  We allocate the rbio big enough
108 	 * to hold them both and setup their locations when the rbio is
109 	 * allocated.
110 	 */
111 
112 	/*
113 	 * Pointers to pages that we allocated for reading/writing stripes
114 	 * directly from the disk (including P/Q).
115 	 */
116 	struct page **stripe_pages;
117 
118 	/* Pointers to the sectors in the bio_list, for faster lookup */
119 	struct sector_ptr *bio_sectors;
120 
121 	/*
122 	 * For subpage support, we need to map each sector to above
123 	 * stripe_pages.
124 	 */
125 	struct sector_ptr *stripe_sectors;
126 
127 	/* Allocated with real_stripes-many pointers for finish_*() calls */
128 	void **finish_pointers;
129 
130 	/*
131 	 * The bitmap recording where IO errors happened.
132 	 * Each bit is corresponding to one sector in either bio_sectors[] or
133 	 * stripe_sectors[] array.
134 	 *
135 	 * The reason we don't use another bit in sector_ptr is, we have two
136 	 * arrays of sectors, and a lot of IO can use sectors in both arrays.
137 	 * Thus making it much harder to iterate.
138 	 */
139 	unsigned long *error_bitmap;
140 
141 	/*
142 	 * Checksum buffer if the rbio is for data.  The buffer should cover
143 	 * all data sectors (excluding P/Q sectors).
144 	 */
145 	u8 *csum_buf;
146 
147 	/*
148 	 * Each bit represents if the corresponding sector has data csum found.
149 	 * Should only cover data sectors (excluding P/Q sectors).
150 	 */
151 	unsigned long *csum_bitmap;
152 };
153 
154 /*
155  * For trace event usage only. Records useful debug info for each bio submitted
156  * by RAID56 to each physical device.
157  *
158  * No matter signed or not, (-1) is always the one indicating we can not grab
159  * the proper stripe number.
160  */
161 struct raid56_bio_trace_info {
162 	u64 devid;
163 
164 	/* The offset inside the stripe. (<= STRIPE_LEN) */
165 	u32 offset;
166 
167 	/*
168 	 * Stripe number.
169 	 * 0 is the first data stripe, and nr_data for P stripe,
170 	 * nr_data + 1 for Q stripe.
171 	 * >= real_stripes for
172 	 */
173 	u8 stripe_nr;
174 };
175 
176 static inline int nr_data_stripes(const struct btrfs_chunk_map *map)
177 {
178 	return map->num_stripes - btrfs_nr_parity_stripes(map->type);
179 }
180 
181 static inline int nr_bioc_data_stripes(const struct btrfs_io_context *bioc)
182 {
183 	return bioc->num_stripes - btrfs_nr_parity_stripes(bioc->map_type);
184 }
185 
186 #define RAID5_P_STRIPE ((u64)-2)
187 #define RAID6_Q_STRIPE ((u64)-1)
188 
189 #define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) ||		\
190 			     ((x) == RAID6_Q_STRIPE))
191 
192 struct btrfs_device;
193 
194 void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
195 			   int mirror_num);
196 void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc);
197 
198 struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
199 				struct btrfs_io_context *bioc,
200 				struct btrfs_device *scrub_dev,
201 				unsigned long *dbitmap, int stripe_nsectors);
202 void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
203 
204 void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
205 				    struct page **data_pages, u64 data_logical);
206 
207 int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
208 void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
209 
210 #endif
211