1 /*- 2 * Copyright (c) 2004 Pawel Jakub Dawidek <pjd@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #ifndef _G_RAID3_H_ 30 #define _G_RAID3_H_ 31 32 #include <sys/endian.h> 33 #include <sys/md5.h> 34 35 #define G_RAID3_CLASS_NAME "RAID3" 36 37 #define G_RAID3_MAGIC "GEOM::RAID3" 38 /* 39 * Version history: 40 * 0 - Initial version number. 41 * 1 - Added 'round-robin reading' algorithm. 42 * 2 - Added 'verify reading' algorithm. 43 * 3 - Added md_genid field to metadata. 44 */ 45 #define G_RAID3_VERSION 3 46 47 #define G_RAID3_DISK_FLAG_DIRTY 0x0000000000000001ULL 48 #define G_RAID3_DISK_FLAG_SYNCHRONIZING 0x0000000000000002ULL 49 #define G_RAID3_DISK_FLAG_FORCE_SYNC 0x0000000000000004ULL 50 #define G_RAID3_DISK_FLAG_HARDCODED 0x0000000000000008ULL 51 #define G_RAID3_DISK_FLAG_MASK (G_RAID3_DISK_FLAG_DIRTY | \ 52 G_RAID3_DISK_FLAG_SYNCHRONIZING | \ 53 G_RAID3_DISK_FLAG_FORCE_SYNC) 54 55 #define G_RAID3_DEVICE_FLAG_NOAUTOSYNC 0x0000000000000001ULL 56 #define G_RAID3_DEVICE_FLAG_ROUND_ROBIN 0x0000000000000002ULL 57 #define G_RAID3_DEVICE_FLAG_VERIFY 0x0000000000000004ULL 58 #define G_RAID3_DEVICE_FLAG_MASK (G_RAID3_DEVICE_FLAG_NOAUTOSYNC | \ 59 G_RAID3_DEVICE_FLAG_ROUND_ROBIN | \ 60 G_RAID3_DEVICE_FLAG_VERIFY) 61 62 #ifdef _KERNEL 63 extern u_int g_raid3_debug; 64 65 #define G_RAID3_DEBUG(lvl, ...) do { \ 66 if (g_raid3_debug >= (lvl)) { \ 67 printf("GEOM_RAID3"); \ 68 if (g_raid3_debug > 0) \ 69 printf("[%u]", lvl); \ 70 printf(": "); \ 71 printf(__VA_ARGS__); \ 72 printf("\n"); \ 73 } \ 74 } while (0) 75 #define G_RAID3_LOGREQ(lvl, bp, ...) do { \ 76 if (g_raid3_debug >= (lvl)) { \ 77 printf("GEOM_RAID3"); \ 78 if (g_raid3_debug > 0) \ 79 printf("[%u]", lvl); \ 80 printf(": "); \ 81 printf(__VA_ARGS__); \ 82 printf(" "); \ 83 g_print_bio(bp); \ 84 printf("\n"); \ 85 } \ 86 } while (0) 87 88 #define G_RAID3_BIO_CFLAG_REGULAR 0x01 89 #define G_RAID3_BIO_CFLAG_SYNC 0x02 90 #define G_RAID3_BIO_CFLAG_PARITY 0x04 91 #define G_RAID3_BIO_CFLAG_NODISK 0x08 92 #define G_RAID3_BIO_CFLAG_REGSYNC 0x10 93 #define G_RAID3_BIO_CFLAG_MASK (G_RAID3_BIO_CFLAG_REGULAR | \ 94 G_RAID3_BIO_CFLAG_SYNC | \ 95 G_RAID3_BIO_CFLAG_PARITY | \ 96 G_RAID3_BIO_CFLAG_NODISK | \ 97 G_RAID3_BIO_CFLAG_REGSYNC) 98 99 #define G_RAID3_BIO_PFLAG_DEGRADED 0x01 100 #define G_RAID3_BIO_PFLAG_NOPARITY 0x02 101 #define G_RAID3_BIO_PFLAG_VERIFY 0x04 102 #define G_RAID3_BIO_PFLAG_MASK (G_RAID3_BIO_PFLAG_DEGRADED | \ 103 G_RAID3_BIO_PFLAG_NOPARITY | \ 104 G_RAID3_BIO_PFLAG_VERIFY) 105 106 /* 107 * Informations needed for synchronization. 108 */ 109 struct g_raid3_disk_sync { 110 struct g_consumer *ds_consumer; /* Consumer connected to our device. */ 111 off_t ds_offset; /* Offset of next request to send. */ 112 off_t ds_offset_done; /* Offset of already synchronized 113 region. */ 114 off_t ds_resync; /* Resynchronize from this offset. */ 115 u_int ds_syncid; /* Disk's synchronization ID. */ 116 u_char *ds_data; 117 }; 118 119 /* 120 * Informations needed for synchronization. 121 */ 122 struct g_raid3_device_sync { 123 struct g_geom *ds_geom; /* Synchronization geom. */ 124 }; 125 126 #define G_RAID3_DISK_STATE_NODISK 0 127 #define G_RAID3_DISK_STATE_NONE 1 128 #define G_RAID3_DISK_STATE_NEW 2 129 #define G_RAID3_DISK_STATE_ACTIVE 3 130 #define G_RAID3_DISK_STATE_STALE 4 131 #define G_RAID3_DISK_STATE_SYNCHRONIZING 5 132 #define G_RAID3_DISK_STATE_DISCONNECTED 6 133 #define G_RAID3_DISK_STATE_DESTROY 7 134 struct g_raid3_disk { 135 u_int d_no; /* Disk number. */ 136 struct g_consumer *d_consumer; /* Consumer. */ 137 struct g_raid3_softc *d_softc; /* Back-pointer to softc. */ 138 int d_state; /* Disk state. */ 139 uint64_t d_flags; /* Additional flags. */ 140 u_int d_genid; /* Disk's generation ID. */ 141 struct g_raid3_disk_sync d_sync; /* Sync information. */ 142 LIST_ENTRY(g_raid3_disk) d_next; 143 }; 144 #define d_name d_consumer->provider->name 145 146 #define G_RAID3_EVENT_DONTWAIT 0x1 147 #define G_RAID3_EVENT_WAIT 0x2 148 #define G_RAID3_EVENT_DEVICE 0x4 149 #define G_RAID3_EVENT_DONE 0x8 150 struct g_raid3_event { 151 struct g_raid3_disk *e_disk; 152 int e_state; 153 int e_flags; 154 int e_error; 155 TAILQ_ENTRY(g_raid3_event) e_next; 156 }; 157 158 #define G_RAID3_DEVICE_FLAG_DESTROY 0x0100000000000000ULL 159 #define G_RAID3_DEVICE_FLAG_WAIT 0x0200000000000000ULL 160 161 #define G_RAID3_DEVICE_STATE_STARTING 0 162 #define G_RAID3_DEVICE_STATE_DEGRADED 1 163 #define G_RAID3_DEVICE_STATE_COMPLETE 2 164 165 /* Bump syncid on first write. */ 166 #define G_RAID3_BUMP_SYNCID 0x1 167 /* Bump genid immediately. */ 168 #define G_RAID3_BUMP_GENID 0x2 169 170 struct g_raid3_softc { 171 u_int sc_state; /* Device state. */ 172 uint64_t sc_mediasize; /* Device size. */ 173 uint32_t sc_sectorsize; /* Sector size. */ 174 uint64_t sc_flags; /* Additional flags. */ 175 176 struct g_geom *sc_geom; 177 struct g_provider *sc_provider; 178 179 uint32_t sc_id; /* Device unique ID. */ 180 181 struct bio_queue_head sc_queue; 182 struct mtx sc_queue_mtx; 183 struct proc *sc_worker; 184 185 struct g_raid3_disk *sc_disks; 186 u_int sc_ndisks; /* Number of disks. */ 187 u_int sc_round_robin; 188 struct g_raid3_disk *sc_syncdisk; 189 190 uma_zone_t sc_zone_64k; 191 uma_zone_t sc_zone_16k; 192 uma_zone_t sc_zone_4k; 193 194 u_int sc_genid; /* Generation ID. */ 195 u_int sc_syncid; /* Synchronization ID. */ 196 int sc_bump_id; 197 struct g_raid3_device_sync sc_sync; 198 int sc_idle; /* DIRTY flags removed. */ 199 200 TAILQ_HEAD(, g_raid3_event) sc_events; 201 struct mtx sc_events_mtx; 202 203 struct callout sc_callout; 204 }; 205 #define sc_name sc_geom->name 206 207 const char *g_raid3_get_diskname(struct g_raid3_disk *disk); 208 u_int g_raid3_ndisks(struct g_raid3_softc *sc, int state); 209 int g_raid3_destroy(struct g_raid3_softc *sc, boolean_t force); 210 int g_raid3_event_send(void *arg, int state, int flags); 211 struct g_raid3_metadata; 212 int g_raid3_add_disk(struct g_raid3_softc *sc, struct g_provider *pp, 213 struct g_raid3_metadata *md); 214 int g_raid3_read_metadata(struct g_consumer *cp, struct g_raid3_metadata *md); 215 void g_raid3_fill_metadata(struct g_raid3_disk *disk, 216 struct g_raid3_metadata *md); 217 int g_raid3_clear_metadata(struct g_raid3_disk *disk); 218 void g_raid3_update_metadata(struct g_raid3_disk *disk); 219 220 g_ctl_req_t g_raid3_config; 221 #endif /* _KERNEL */ 222 223 struct g_raid3_metadata { 224 char md_magic[16]; /* Magic value. */ 225 uint32_t md_version; /* Version number. */ 226 char md_name[16]; /* Device name. */ 227 uint32_t md_id; /* Device unique ID. */ 228 uint16_t md_no; /* Component number. */ 229 uint16_t md_all; /* Number of disks in device. */ 230 uint32_t md_genid; /* Generation ID. */ 231 uint32_t md_syncid; /* Synchronization ID. */ 232 uint64_t md_mediasize; /* Size of whole device. */ 233 uint32_t md_sectorsize; /* Sector size. */ 234 uint64_t md_sync_offset; /* Synchronized offset. */ 235 uint64_t md_mflags; /* Additional device flags. */ 236 uint64_t md_dflags; /* Additional disk flags. */ 237 char md_provider[16]; /* Hardcoded provider. */ 238 u_char md_hash[16]; /* MD5 hash. */ 239 }; 240 static __inline void 241 raid3_metadata_encode(struct g_raid3_metadata *md, u_char *data) 242 { 243 MD5_CTX ctx; 244 245 bcopy(md->md_magic, data, 16); 246 le32enc(data + 16, md->md_version); 247 bcopy(md->md_name, data + 20, 16); 248 le32enc(data + 36, md->md_id); 249 le16enc(data + 40, md->md_no); 250 le16enc(data + 42, md->md_all); 251 le32enc(data + 44, md->md_genid); 252 le32enc(data + 48, md->md_syncid); 253 le64enc(data + 52, md->md_mediasize); 254 le32enc(data + 60, md->md_sectorsize); 255 le64enc(data + 64, md->md_sync_offset); 256 le64enc(data + 72, md->md_mflags); 257 le64enc(data + 80, md->md_dflags); 258 bcopy(md->md_provider, data + 88, 16); 259 MD5Init(&ctx); 260 MD5Update(&ctx, data, 104); 261 MD5Final(md->md_hash, &ctx); 262 bcopy(md->md_hash, data + 104, 16); 263 } 264 static __inline int 265 raid3_metadata_decode_v0v1v2(const u_char *data, struct g_raid3_metadata *md) 266 { 267 MD5_CTX ctx; 268 269 bcopy(data + 20, md->md_name, 16); 270 md->md_id = le32dec(data + 36); 271 md->md_no = le16dec(data + 40); 272 md->md_all = le16dec(data + 42); 273 md->md_syncid = le32dec(data + 44); 274 md->md_mediasize = le64dec(data + 48); 275 md->md_sectorsize = le32dec(data + 56); 276 md->md_sync_offset = le64dec(data + 60); 277 md->md_mflags = le64dec(data + 68); 278 md->md_dflags = le64dec(data + 76); 279 bcopy(data + 84, md->md_provider, 16); 280 bcopy(data + 100, md->md_hash, 16); 281 MD5Init(&ctx); 282 MD5Update(&ctx, data, 100); 283 MD5Final(md->md_hash, &ctx); 284 if (bcmp(md->md_hash, data + 100, 16) != 0) 285 return (EINVAL); 286 return (0); 287 } 288 static __inline int 289 raid3_metadata_decode_v3(const u_char *data, struct g_raid3_metadata *md) 290 { 291 MD5_CTX ctx; 292 293 bcopy(data + 20, md->md_name, 16); 294 md->md_id = le32dec(data + 36); 295 md->md_no = le16dec(data + 40); 296 md->md_all = le16dec(data + 42); 297 md->md_genid = le32dec(data + 44); 298 md->md_syncid = le32dec(data + 48); 299 md->md_mediasize = le64dec(data + 52); 300 md->md_sectorsize = le32dec(data + 60); 301 md->md_sync_offset = le64dec(data + 64); 302 md->md_mflags = le64dec(data + 72); 303 md->md_dflags = le64dec(data + 80); 304 bcopy(data + 88, md->md_provider, 16); 305 bcopy(data + 104, md->md_hash, 16); 306 MD5Init(&ctx); 307 MD5Update(&ctx, data, 104); 308 MD5Final(md->md_hash, &ctx); 309 if (bcmp(md->md_hash, data + 104, 16) != 0) 310 return (EINVAL); 311 return (0); 312 } 313 static __inline int 314 raid3_metadata_decode(const u_char *data, struct g_raid3_metadata *md) 315 { 316 int error; 317 318 bcopy(data, md->md_magic, 16); 319 md->md_version = le32dec(data + 16); 320 switch (md->md_version) { 321 case 0: 322 case 1: 323 case 2: 324 error = raid3_metadata_decode_v0v1v2(data, md); 325 break; 326 case 3: 327 error = raid3_metadata_decode_v3(data, md); 328 break; 329 default: 330 error = EINVAL; 331 break; 332 } 333 return (error); 334 } 335 336 static __inline void 337 raid3_metadata_dump(const struct g_raid3_metadata *md) 338 { 339 static const char hex[] = "0123456789abcdef"; 340 char hash[16 * 2 + 1]; 341 u_int i; 342 343 printf(" magic: %s\n", md->md_magic); 344 printf(" version: %u\n", (u_int)md->md_version); 345 printf(" name: %s\n", md->md_name); 346 printf(" id: %u\n", (u_int)md->md_id); 347 printf(" no: %u\n", (u_int)md->md_no); 348 printf(" all: %u\n", (u_int)md->md_all); 349 printf(" genid: %u\n", (u_int)md->md_genid); 350 printf(" syncid: %u\n", (u_int)md->md_syncid); 351 printf(" mediasize: %jd\n", (intmax_t)md->md_mediasize); 352 printf("sectorsize: %u\n", (u_int)md->md_sectorsize); 353 printf("syncoffset: %jd\n", (intmax_t)md->md_sync_offset); 354 printf(" mflags:"); 355 if (md->md_mflags == 0) 356 printf(" NONE"); 357 else { 358 if ((md->md_mflags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0) 359 printf(" NOAUTOSYNC"); 360 if ((md->md_mflags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) 361 printf(" ROUND-ROBIN"); 362 if ((md->md_mflags & G_RAID3_DEVICE_FLAG_VERIFY) != 0) 363 printf(" VERIFY"); 364 } 365 printf("\n"); 366 printf(" dflags:"); 367 if (md->md_dflags == 0) 368 printf(" NONE"); 369 else { 370 if ((md->md_dflags & G_RAID3_DISK_FLAG_DIRTY) != 0) 371 printf(" DIRTY"); 372 if ((md->md_dflags & G_RAID3_DISK_FLAG_SYNCHRONIZING) != 0) 373 printf(" SYNCHRONIZING"); 374 if ((md->md_dflags & G_RAID3_DISK_FLAG_FORCE_SYNC) != 0) 375 printf(" FORCE_SYNC"); 376 } 377 printf("\n"); 378 printf("hcprovider: %s\n", md->md_provider); 379 bzero(hash, sizeof(hash)); 380 for (i = 0; i < 16; i++) { 381 hash[i * 2] = hex[md->md_hash[i] >> 4]; 382 hash[i * 2 + 1] = hex[md->md_hash[i] & 0x0f]; 383 } 384 printf(" MD5 hash: %s\n", hash); 385 } 386 #endif /* !_G_RAID3_H_ */ 387