1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (C) 2016 Gvozden Neskovic <neskovic@compeng.uni-frankfurt.de>. 23 */ 24 25 #ifndef _SYS_VDEV_RAIDZ_H 26 #define _SYS_VDEV_RAIDZ_H 27 28 #include <sys/types.h> 29 #include <sys/zfs_rlock.h> 30 31 #ifdef __cplusplus 32 extern "C" { 33 #endif 34 35 struct zio; 36 struct raidz_col; 37 struct raidz_row; 38 struct raidz_map; 39 struct vdev_raidz; 40 struct uberblock; 41 #if !defined(_KERNEL) 42 struct kernel_param {}; 43 #endif 44 45 /* 46 * vdev_raidz interface 47 */ 48 struct raidz_map *vdev_raidz_map_alloc(struct zio *, uint64_t, uint64_t, 49 uint64_t); 50 struct raidz_map *vdev_raidz_map_alloc_expanded(struct zio *, 51 uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, boolean_t); 52 void vdev_raidz_map_free(struct raidz_map *); 53 void vdev_raidz_free(struct vdev_raidz *); 54 void vdev_raidz_generate_parity_row(struct raidz_map *, struct raidz_row *); 55 void vdev_raidz_generate_parity(struct raidz_map *); 56 void vdev_raidz_reconstruct(struct raidz_map *, const int *, int); 57 void vdev_raidz_child_done(zio_t *); 58 void vdev_raidz_io_done(zio_t *); 59 void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *); 60 struct raidz_row *vdev_raidz_row_alloc(int, zio_t *); 61 void vdev_raidz_reflow_copy_scratch(spa_t *); 62 void raidz_dtl_reassessed(vdev_t *); 63 64 extern const zio_vsd_ops_t vdev_raidz_vsd_ops; 65 66 /* 67 * vdev_raidz_math interface 68 */ 69 /* Required, but not used, by ZFS_MODULE_PARAM_CALL */ 70 extern uint32_t zfs_vdev_raidz_impl; 71 void vdev_raidz_math_init(void); 72 void vdev_raidz_math_fini(void); 73 const struct raidz_impl_ops *vdev_raidz_math_get_ops(void); 74 int vdev_raidz_math_generate(struct raidz_map *, struct raidz_row *); 75 int vdev_raidz_math_reconstruct(struct raidz_map *, struct raidz_row *, 76 const int *, const int *, const int); 77 int vdev_raidz_impl_set(const char *); 78 int vdev_raidz_impl_get(char *buffer, size_t size); 79 80 typedef struct vdev_raidz_expand { 81 uint64_t vre_vdev_id; 82 83 kmutex_t vre_lock; 84 kcondvar_t vre_cv; 85 86 /* 87 * How much i/o is outstanding (issued and not completed). 88 */ 89 uint64_t vre_outstanding_bytes; 90 91 /* 92 * Next offset to issue i/o for. 93 */ 94 uint64_t vre_offset; 95 96 /* 97 * Lowest offset of a failed expansion i/o. The expansion will retry 98 * from here. Once the expansion thread notices the failure and exits, 99 * vre_failed_offset is reset back to UINT64_MAX, and 100 * vre_waiting_for_resilver will be set. 101 */ 102 uint64_t vre_failed_offset; 103 boolean_t vre_waiting_for_resilver; 104 105 /* 106 * Offset that is completing each txg 107 */ 108 uint64_t vre_offset_pertxg[TXG_SIZE]; 109 110 /* 111 * Bytes copied in each txg. 112 */ 113 uint64_t vre_bytes_copied_pertxg[TXG_SIZE]; 114 115 /* 116 * The rangelock prevents normal read/write zio's from happening while 117 * there are expansion (reflow) i/os in progress to the same offsets. 118 */ 119 zfs_rangelock_t vre_rangelock; 120 121 /* 122 * These fields are stored on-disk in the vdev_top_zap: 123 */ 124 dsl_scan_state_t vre_state; 125 uint64_t vre_start_time; 126 uint64_t vre_end_time; 127 uint64_t vre_bytes_copied; 128 } vdev_raidz_expand_t; 129 130 typedef struct vdev_raidz { 131 /* 132 * Number of child vdevs when this raidz vdev was created (i.e. before 133 * any raidz expansions). 134 */ 135 int vd_original_width; 136 137 /* 138 * The current number of child vdevs, which may be more than the 139 * original width if an expansion is in progress or has completed. 140 */ 141 int vd_physical_width; 142 143 int vd_nparity; 144 145 /* 146 * Tree of reflow_node_t's. The lock protects the avl tree only. 147 * The reflow_node_t's describe completed expansions, and are used 148 * to determine the logical width given a block's birth time. 149 */ 150 avl_tree_t vd_expand_txgs; 151 kmutex_t vd_expand_lock; 152 153 /* 154 * If this vdev is being expanded, spa_raidz_expand is set to this 155 */ 156 vdev_raidz_expand_t vn_vre; 157 } vdev_raidz_t; 158 159 extern int vdev_raidz_attach_check(vdev_t *); 160 extern void vdev_raidz_attach_sync(void *, dmu_tx_t *); 161 extern void spa_start_raidz_expansion_thread(spa_t *); 162 extern int spa_raidz_expand_get_stats(spa_t *, pool_raidz_expand_stat_t *); 163 extern int vdev_raidz_load(vdev_t *); 164 165 /* RAIDZ scratch area pause points (for testing) */ 166 #define RAIDZ_EXPAND_PAUSE_NONE 0 167 #define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_1 1 168 #define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_2 2 169 #define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_3 3 170 #define RAIDZ_EXPAND_PAUSE_SCRATCH_VALID 4 171 #define RAIDZ_EXPAND_PAUSE_SCRATCH_REFLOWED 5 172 #define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_1 6 173 #define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_2 7 174 175 #ifdef __cplusplus 176 } 177 #endif 178 179 #endif /* _SYS_VDEV_RAIDZ_H */ 180