xref: /freebsd/sys/contrib/openzfs/include/sys/vdev_raidz.h (revision dd32d6b29d49838c99d38ba30846ade210b2e6f7)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (C) 2016 Gvozden Neskovic <neskovic@compeng.uni-frankfurt.de>.
24  */
25 
26 #ifndef _SYS_VDEV_RAIDZ_H
27 #define	_SYS_VDEV_RAIDZ_H
28 
29 #include <sys/types.h>
30 #include <sys/zfs_rlock.h>
31 
32 #ifdef	__cplusplus
33 extern "C" {
34 #endif
35 
36 struct zio;
37 struct raidz_col;
38 struct raidz_row;
39 struct raidz_map;
40 struct vdev_raidz;
41 struct uberblock;
42 #if !defined(_KERNEL)
43 struct kernel_param {};
44 #endif
45 
46 /*
47  * vdev_raidz interface
48  */
49 struct raidz_map *vdev_raidz_map_alloc(struct zio *, uint64_t, uint64_t,
50     uint64_t);
51 struct raidz_map *vdev_raidz_map_alloc_expanded(struct zio *,
52     uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, boolean_t);
53 void vdev_raidz_map_free(struct raidz_map *);
54 void vdev_raidz_free(struct vdev_raidz *);
55 void vdev_raidz_generate_parity_row(struct raidz_map *, struct raidz_row *);
56 void vdev_raidz_generate_parity(struct raidz_map *);
57 void vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
58 void vdev_raidz_child_done(zio_t *);
59 void vdev_raidz_io_done(zio_t *);
60 void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *);
61 struct raidz_row *vdev_raidz_row_alloc(int, zio_t *);
62 void vdev_raidz_reflow_copy_scratch(spa_t *);
63 void raidz_dtl_reassessed(vdev_t *);
64 boolean_t vdev_sit_out_reads(vdev_t *, zio_flag_t);
65 void vdev_raidz_sit_child(vdev_t *, uint64_t);
66 void vdev_raidz_unsit_child(vdev_t *);
67 
68 extern const zio_vsd_ops_t vdev_raidz_vsd_ops;
69 
70 /*
71  * vdev_raidz_math interface
72  */
73 /* Required, but not used, by ZFS_MODULE_PARAM_CALL */
74 extern uint32_t zfs_vdev_raidz_impl;
75 void vdev_raidz_math_init(void);
76 void vdev_raidz_math_fini(void);
77 const struct raidz_impl_ops *vdev_raidz_math_get_ops(void);
78 int vdev_raidz_math_generate(struct raidz_map *, struct raidz_row *);
79 int vdev_raidz_math_reconstruct(struct raidz_map *, struct raidz_row *,
80     const int *, const int *, const int);
81 int vdev_raidz_impl_set(const char *);
82 int vdev_raidz_impl_get(char *buffer, size_t size);
83 
84 typedef struct vdev_raidz_expand {
85 	uint64_t vre_vdev_id;
86 
87 	kmutex_t vre_lock;
88 	kcondvar_t vre_cv;
89 
90 	/*
91 	 * How much i/o is outstanding (issued and not completed).
92 	 */
93 	uint64_t vre_outstanding_bytes;
94 
95 	/*
96 	 * Next offset to issue i/o for.
97 	 */
98 	uint64_t vre_offset;
99 
100 	/*
101 	 * Lowest offset of a failed expansion i/o.  The expansion will retry
102 	 * from here.  Once the expansion thread notices the failure and exits,
103 	 * vre_failed_offset is reset back to UINT64_MAX, and
104 	 * vre_waiting_for_resilver will be set.
105 	 */
106 	uint64_t vre_failed_offset;
107 	boolean_t vre_waiting_for_resilver;
108 
109 	/*
110 	 * Offset that is completing each txg
111 	 */
112 	uint64_t vre_offset_pertxg[TXG_SIZE];
113 
114 	/*
115 	 * Bytes copied in each txg.
116 	 */
117 	uint64_t vre_bytes_copied_pertxg[TXG_SIZE];
118 
119 	/*
120 	 * The rangelock prevents normal read/write zio's from happening while
121 	 * there are expansion (reflow) i/os in progress to the same offsets.
122 	 */
123 	zfs_rangelock_t vre_rangelock;
124 
125 	/*
126 	 * These fields are stored on-disk in the vdev_top_zap:
127 	 */
128 	dsl_scan_state_t vre_state;
129 	uint64_t vre_start_time;
130 	uint64_t vre_end_time;
131 	uint64_t vre_bytes_copied;
132 } vdev_raidz_expand_t;
133 
134 typedef struct vdev_raidz {
135 	/*
136 	 * Number of child vdevs when this raidz vdev was created (i.e. before
137 	 * any raidz expansions).
138 	 */
139 	int vd_original_width;
140 
141 	/*
142 	 * The current number of child vdevs, which may be more than the
143 	 * original width if an expansion is in progress or has completed.
144 	 */
145 	int vd_physical_width;
146 
147 	int vd_nparity;
148 
149 	/*
150 	 * Tree of reflow_node_t's.  The lock protects the avl tree only.
151 	 * The reflow_node_t's describe completed expansions, and are used
152 	 * to determine the logical width given a block's birth time.
153 	 */
154 	avl_tree_t vd_expand_txgs;
155 	kmutex_t vd_expand_lock;
156 
157 	/*
158 	 * If this vdev is being expanded, spa_raidz_expand is set to this
159 	 */
160 	vdev_raidz_expand_t vn_vre;
161 } vdev_raidz_t;
162 
163 extern int vdev_raidz_attach_check(vdev_t *);
164 extern void vdev_raidz_attach_sync(void *, dmu_tx_t *);
165 extern void spa_start_raidz_expansion_thread(spa_t *);
166 extern int spa_raidz_expand_get_stats(spa_t *, pool_raidz_expand_stat_t *);
167 extern int vdev_raidz_load(vdev_t *);
168 
169 /* RAIDZ scratch area pause points (for testing) */
170 #define	RAIDZ_EXPAND_PAUSE_NONE	0
171 #define	RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_1 1
172 #define	RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_2 2
173 #define	RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_3 3
174 #define	RAIDZ_EXPAND_PAUSE_SCRATCH_VALID 4
175 #define	RAIDZ_EXPAND_PAUSE_SCRATCH_REFLOWED 5
176 #define	RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_1 6
177 #define	RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_2 7
178 
179 #ifdef	__cplusplus
180 }
181 #endif
182 
183 #endif /* _SYS_VDEV_RAIDZ_H */
184