1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright (C) 2016 Gvozden Neskovic <neskovic@compeng.uni-frankfurt.de>. 24 */ 25 26 #ifndef _SYS_VDEV_RAIDZ_H 27 #define _SYS_VDEV_RAIDZ_H 28 29 #include <sys/types.h> 30 #include <sys/zfs_rlock.h> 31 32 #ifdef __cplusplus 33 extern "C" { 34 #endif 35 36 struct zio; 37 struct raidz_col; 38 struct raidz_row; 39 struct raidz_map; 40 struct vdev_raidz; 41 struct uberblock; 42 #if !defined(_KERNEL) 43 struct kernel_param {}; 44 #endif 45 46 /* 47 * vdev_raidz interface 48 */ 49 struct raidz_map *vdev_raidz_map_alloc(struct zio *, uint64_t, uint64_t, 50 uint64_t); 51 struct raidz_map *vdev_raidz_map_alloc_expanded(struct zio *, 52 uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, boolean_t); 53 void vdev_raidz_map_free(struct raidz_map *); 54 void vdev_raidz_free(struct vdev_raidz *); 55 void vdev_raidz_generate_parity_row(struct raidz_map *, struct raidz_row *); 56 void vdev_raidz_generate_parity(struct raidz_map *); 57 void vdev_raidz_reconstruct(struct raidz_map *, const int *, int); 58 void vdev_raidz_child_done(zio_t *); 59 void vdev_raidz_io_done(zio_t *); 60 void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *); 61 struct raidz_row *vdev_raidz_row_alloc(int, zio_t *); 62 void vdev_raidz_reflow_copy_scratch(spa_t *); 63 void raidz_dtl_reassessed(vdev_t *); 64 65 extern const zio_vsd_ops_t vdev_raidz_vsd_ops; 66 67 /* 68 * vdev_raidz_math interface 69 */ 70 /* Required, but not used, by ZFS_MODULE_PARAM_CALL */ 71 extern uint32_t zfs_vdev_raidz_impl; 72 void vdev_raidz_math_init(void); 73 void vdev_raidz_math_fini(void); 74 const struct raidz_impl_ops *vdev_raidz_math_get_ops(void); 75 int vdev_raidz_math_generate(struct raidz_map *, struct raidz_row *); 76 int vdev_raidz_math_reconstruct(struct raidz_map *, struct raidz_row *, 77 const int *, const int *, const int); 78 int vdev_raidz_impl_set(const char *); 79 int vdev_raidz_impl_get(char *buffer, size_t size); 80 81 typedef struct vdev_raidz_expand { 82 uint64_t vre_vdev_id; 83 84 kmutex_t vre_lock; 85 kcondvar_t vre_cv; 86 87 /* 88 * How much i/o is outstanding (issued and not completed). 89 */ 90 uint64_t vre_outstanding_bytes; 91 92 /* 93 * Next offset to issue i/o for. 94 */ 95 uint64_t vre_offset; 96 97 /* 98 * Lowest offset of a failed expansion i/o. The expansion will retry 99 * from here. Once the expansion thread notices the failure and exits, 100 * vre_failed_offset is reset back to UINT64_MAX, and 101 * vre_waiting_for_resilver will be set. 102 */ 103 uint64_t vre_failed_offset; 104 boolean_t vre_waiting_for_resilver; 105 106 /* 107 * Offset that is completing each txg 108 */ 109 uint64_t vre_offset_pertxg[TXG_SIZE]; 110 111 /* 112 * Bytes copied in each txg. 113 */ 114 uint64_t vre_bytes_copied_pertxg[TXG_SIZE]; 115 116 /* 117 * The rangelock prevents normal read/write zio's from happening while 118 * there are expansion (reflow) i/os in progress to the same offsets. 119 */ 120 zfs_rangelock_t vre_rangelock; 121 122 /* 123 * These fields are stored on-disk in the vdev_top_zap: 124 */ 125 dsl_scan_state_t vre_state; 126 uint64_t vre_start_time; 127 uint64_t vre_end_time; 128 uint64_t vre_bytes_copied; 129 } vdev_raidz_expand_t; 130 131 typedef struct vdev_raidz { 132 /* 133 * Number of child vdevs when this raidz vdev was created (i.e. before 134 * any raidz expansions). 135 */ 136 int vd_original_width; 137 138 /* 139 * The current number of child vdevs, which may be more than the 140 * original width if an expansion is in progress or has completed. 141 */ 142 int vd_physical_width; 143 144 int vd_nparity; 145 146 /* 147 * Tree of reflow_node_t's. The lock protects the avl tree only. 148 * The reflow_node_t's describe completed expansions, and are used 149 * to determine the logical width given a block's birth time. 150 */ 151 avl_tree_t vd_expand_txgs; 152 kmutex_t vd_expand_lock; 153 154 /* 155 * If this vdev is being expanded, spa_raidz_expand is set to this 156 */ 157 vdev_raidz_expand_t vn_vre; 158 } vdev_raidz_t; 159 160 extern int vdev_raidz_attach_check(vdev_t *); 161 extern void vdev_raidz_attach_sync(void *, dmu_tx_t *); 162 extern void spa_start_raidz_expansion_thread(spa_t *); 163 extern int spa_raidz_expand_get_stats(spa_t *, pool_raidz_expand_stat_t *); 164 extern int vdev_raidz_load(vdev_t *); 165 166 /* RAIDZ scratch area pause points (for testing) */ 167 #define RAIDZ_EXPAND_PAUSE_NONE 0 168 #define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_1 1 169 #define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_2 2 170 #define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_3 3 171 #define RAIDZ_EXPAND_PAUSE_SCRATCH_VALID 4 172 #define RAIDZ_EXPAND_PAUSE_SCRATCH_REFLOWED 5 173 #define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_1 6 174 #define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_2 7 175 176 #ifdef __cplusplus 177 } 178 #endif 179 180 #endif /* _SYS_VDEV_RAIDZ_H */ 181