1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (C) 2016 Gvozden Neskovic <neskovic@compeng.uni-frankfurt.de>. 23 */ 24 25 #ifndef _SYS_VDEV_RAIDZ_H 26 #define _SYS_VDEV_RAIDZ_H 27 28 #include <sys/types.h> 29 #include <sys/zfs_rlock.h> 30 31 #ifdef __cplusplus 32 extern "C" { 33 #endif 34 35 struct zio; 36 struct raidz_col; 37 struct raidz_row; 38 struct raidz_map; 39 struct vdev_raidz; 40 struct uberblock; 41 #if !defined(_KERNEL) 42 struct kernel_param {}; 43 #endif 44 45 /* 46 * vdev_raidz interface 47 */ 48 struct raidz_map *vdev_raidz_map_alloc(struct zio *, uint64_t, uint64_t, 49 uint64_t); 50 struct raidz_map *vdev_raidz_map_alloc_expanded(struct zio *, 51 uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, uint64_t, boolean_t); 52 void vdev_raidz_map_free(struct raidz_map *); 53 void vdev_raidz_free(struct vdev_raidz *); 54 void vdev_raidz_generate_parity_row(struct raidz_map *, struct raidz_row *); 55 void vdev_raidz_generate_parity(struct raidz_map *); 56 void vdev_raidz_reconstruct(struct raidz_map *, const int *, int); 57 void vdev_raidz_child_done(zio_t *); 58 void vdev_raidz_io_done(zio_t *); 59 void vdev_raidz_checksum_error(zio_t *, struct raidz_col *, abd_t *); 60 struct raidz_row *vdev_raidz_row_alloc(int, zio_t *); 61 void vdev_raidz_reflow_copy_scratch(spa_t *); 62 void raidz_dtl_reassessed(vdev_t *); 63 64 extern const zio_vsd_ops_t vdev_raidz_vsd_ops; 65 66 /* 67 * vdev_raidz_math interface 68 */ 69 void vdev_raidz_math_init(void); 70 void vdev_raidz_math_fini(void); 71 const struct raidz_impl_ops *vdev_raidz_math_get_ops(void); 72 int vdev_raidz_math_generate(struct raidz_map *, struct raidz_row *); 73 int vdev_raidz_math_reconstruct(struct raidz_map *, struct raidz_row *, 74 const int *, const int *, const int); 75 int vdev_raidz_impl_set(const char *); 76 77 typedef struct vdev_raidz_expand { 78 uint64_t vre_vdev_id; 79 80 kmutex_t vre_lock; 81 kcondvar_t vre_cv; 82 83 /* 84 * How much i/o is outstanding (issued and not completed). 85 */ 86 uint64_t vre_outstanding_bytes; 87 88 /* 89 * Next offset to issue i/o for. 90 */ 91 uint64_t vre_offset; 92 93 /* 94 * Lowest offset of a failed expansion i/o. The expansion will retry 95 * from here. Once the expansion thread notices the failure and exits, 96 * vre_failed_offset is reset back to UINT64_MAX, and 97 * vre_waiting_for_resilver will be set. 98 */ 99 uint64_t vre_failed_offset; 100 boolean_t vre_waiting_for_resilver; 101 102 /* 103 * Offset that is completing each txg 104 */ 105 uint64_t vre_offset_pertxg[TXG_SIZE]; 106 107 /* 108 * Bytes copied in each txg. 109 */ 110 uint64_t vre_bytes_copied_pertxg[TXG_SIZE]; 111 112 /* 113 * The rangelock prevents normal read/write zio's from happening while 114 * there are expansion (reflow) i/os in progress to the same offsets. 115 */ 116 zfs_rangelock_t vre_rangelock; 117 118 /* 119 * These fields are stored on-disk in the vdev_top_zap: 120 */ 121 dsl_scan_state_t vre_state; 122 uint64_t vre_start_time; 123 uint64_t vre_end_time; 124 uint64_t vre_bytes_copied; 125 } vdev_raidz_expand_t; 126 127 typedef struct vdev_raidz { 128 /* 129 * Number of child vdevs when this raidz vdev was created (i.e. before 130 * any raidz expansions). 131 */ 132 int vd_original_width; 133 134 /* 135 * The current number of child vdevs, which may be more than the 136 * original width if an expansion is in progress or has completed. 137 */ 138 int vd_physical_width; 139 140 int vd_nparity; 141 142 /* 143 * Tree of reflow_node_t's. The lock protects the avl tree only. 144 * The reflow_node_t's describe completed expansions, and are used 145 * to determine the logical width given a block's birth time. 146 */ 147 avl_tree_t vd_expand_txgs; 148 kmutex_t vd_expand_lock; 149 150 /* 151 * If this vdev is being expanded, spa_raidz_expand is set to this 152 */ 153 vdev_raidz_expand_t vn_vre; 154 } vdev_raidz_t; 155 156 extern int vdev_raidz_attach_check(vdev_t *); 157 extern void vdev_raidz_attach_sync(void *, dmu_tx_t *); 158 extern void spa_start_raidz_expansion_thread(spa_t *); 159 extern int spa_raidz_expand_get_stats(spa_t *, pool_raidz_expand_stat_t *); 160 extern int vdev_raidz_load(vdev_t *); 161 162 /* RAIDZ scratch area pause points (for testing) */ 163 #define RAIDZ_EXPAND_PAUSE_NONE 0 164 #define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_1 1 165 #define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_2 2 166 #define RAIDZ_EXPAND_PAUSE_PRE_SCRATCH_3 3 167 #define RAIDZ_EXPAND_PAUSE_SCRATCH_VALID 4 168 #define RAIDZ_EXPAND_PAUSE_SCRATCH_REFLOWED 5 169 #define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_1 6 170 #define RAIDZ_EXPAND_PAUSE_SCRATCH_POST_REFLOW_2 7 171 172 #ifdef __cplusplus 173 } 174 #endif 175 176 #endif /* _SYS_VDEV_RAIDZ_H */ 177