1 /*- 2 * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #ifndef _G_JOURNAL_H_ 30 #define _G_JOURNAL_H_ 31 32 #include <sys/endian.h> 33 #include <sys/md5.h> 34 #ifdef _KERNEL 35 #include <sys/bio.h> 36 #endif 37 38 #define G_JOURNAL_CLASS_NAME "JOURNAL" 39 40 #define G_JOURNAL_MAGIC "GEOM::JOURNAL" 41 /* 42 * Version history: 43 * 0 - Initial version number. 44 */ 45 #define G_JOURNAL_VERSION 0 46 47 #ifdef _KERNEL 48 extern int g_journal_debug; 49 50 #define GJ_DEBUG(lvl, ...) do { \ 51 if (g_journal_debug >= (lvl)) { \ 52 printf("GEOM_JOURNAL"); \ 53 if (g_journal_debug > 0) \ 54 printf("[%u]", lvl); \ 55 printf(": "); \ 56 printf(__VA_ARGS__); \ 57 printf("\n"); \ 58 } \ 59 } while (0) 60 #define GJ_LOGREQ(lvl, bp, ...) do { \ 61 if (g_journal_debug >= (lvl)) { \ 62 printf("GEOM_JOURNAL"); \ 63 if (g_journal_debug > 0) \ 64 printf("[%u]", lvl); \ 65 printf(": "); \ 66 printf(__VA_ARGS__); \ 67 printf(" "); \ 68 g_print_bio(bp); \ 69 printf("\n"); \ 70 } \ 71 } while (0) 72 73 #define JEMPTY(sc) ((sc)->sc_journal_offset - \ 74 (sc)->sc_jprovider->sectorsize == \ 75 (sc)->sc_active.jj_offset && \ 76 (sc)->sc_current_count == 0) 77 78 #define GJ_BIO_REGULAR 0x00 79 #define GJ_BIO_READ 0x01 80 #define GJ_BIO_JOURNAL 0x02 81 #define GJ_BIO_COPY 0x03 82 #define GJ_BIO_MASK 0x0f 83 84 #if 0 85 #define GJF_BIO_DONT_FREE 0x10 86 #define GJF_BIO_MASK 0xf0 87 #endif 88 89 #define GJF_DEVICE_HARDCODED 0x0001 90 #define GJF_DEVICE_DESTROY 0x0010 91 #define GJF_DEVICE_SWITCH 0x0020 92 #define GJF_DEVICE_BEFORE_SWITCH 0x0040 93 #define GJF_DEVICE_CLEAN 0x0080 94 #define GJF_DEVICE_CHECKSUM 0x0100 95 96 #define GJ_HARD_LIMIT 64 97 98 /* 99 * We keep pointers to journaled data in bio structure and because we 100 * need to store two off_t values (offset in data provider and offset in 101 * journal), we have to borrow bio_completed field for this. 102 */ 103 #define bio_joffset bio_completed 104 /* 105 * Use bio_caller1 field as a pointer in queue. 106 */ 107 #define bio_next bio_caller1 108 109 /* 110 * There are two such structures maintained inside each journaled device. 111 * One describes active part of the journal, were recent requests are stored. 112 * The second describes the last consistent part of the journal with requests 113 * that are copied to the destination provider. 114 */ 115 struct g_journal_journal { 116 struct bio *jj_queue; /* Cached journal entries. */ 117 off_t jj_offset; /* Journal's start offset. */ 118 }; 119 120 struct g_journal_softc { 121 uint32_t sc_id; 122 uint8_t sc_type; 123 uint8_t sc_orig_type; 124 struct g_geom *sc_geom; 125 u_int sc_flags; 126 struct mtx sc_mtx; 127 off_t sc_mediasize; 128 u_int sc_sectorsize; 129 #define GJ_FLUSH_DATA 0x01 130 #define GJ_FLUSH_JOURNAL 0x02 131 u_int sc_bio_flush; 132 133 uint32_t sc_journal_id; 134 uint32_t sc_journal_next_id; 135 int sc_journal_copying; 136 off_t sc_journal_offset; 137 off_t sc_journal_previous_id; 138 139 struct bio_queue_head sc_back_queue; 140 struct bio_queue_head sc_regular_queue; 141 142 struct bio_queue_head sc_delayed_queue; 143 int sc_delayed_count; 144 145 struct bio *sc_current_queue; 146 int sc_current_count; 147 148 struct bio *sc_flush_queue; 149 int sc_flush_count; 150 int sc_flush_in_progress; 151 152 struct bio *sc_copy_queue; 153 int sc_copy_in_progress; 154 155 struct g_consumer *sc_dconsumer; 156 struct g_consumer *sc_jconsumer; 157 158 struct g_journal_journal sc_inactive; 159 struct g_journal_journal sc_active; 160 161 off_t sc_jstart; /* Journal space start offset. */ 162 off_t sc_jend; /* Journal space end offset. */ 163 164 struct callout sc_callout; 165 struct proc *sc_worker; 166 167 struct root_hold_token *sc_rootmount; 168 }; 169 #define sc_dprovider sc_dconsumer->provider 170 #define sc_jprovider sc_jconsumer->provider 171 #define sc_name sc_dprovider->name 172 173 #define GJQ_INSERT_HEAD(head, bp) do { \ 174 (bp)->bio_next = (head); \ 175 (head) = (bp); \ 176 } while (0) 177 #define GJQ_INSERT_AFTER(head, bp, pbp) do { \ 178 if ((pbp) == NULL) \ 179 GJQ_INSERT_HEAD(head, bp); \ 180 else { \ 181 (bp)->bio_next = (pbp)->bio_next; \ 182 (pbp)->bio_next = (bp); \ 183 } \ 184 } while (0) 185 #define GJQ_LAST(head, bp) do { \ 186 struct bio *_bp; \ 187 \ 188 if ((head) == NULL) { \ 189 (bp) = (head); \ 190 break; \ 191 } \ 192 for (_bp = (head); _bp->bio_next != NULL; _bp = _bp->bio_next) \ 193 continue; \ 194 (bp) = (_bp); \ 195 } while (0) 196 #define GJQ_FIRST(head) (head) 197 #define GJQ_REMOVE(head, bp) do { \ 198 struct bio *_bp; \ 199 \ 200 if ((head) == (bp)) { \ 201 (head) = (bp)->bio_next; \ 202 (bp)->bio_next = NULL; \ 203 break; \ 204 } \ 205 for (_bp = (head); _bp->bio_next != NULL; _bp = _bp->bio_next) {\ 206 if (_bp->bio_next == (bp)) \ 207 break; \ 208 } \ 209 KASSERT(_bp->bio_next != NULL, ("NULL bio_next")); \ 210 KASSERT(_bp->bio_next == (bp), ("bio_next != bp")); \ 211 _bp->bio_next = (bp)->bio_next; \ 212 (bp)->bio_next = NULL; \ 213 } while (0) 214 #define GJQ_FOREACH(head, bp) \ 215 for ((bp) = (head); (bp) != NULL; (bp) = (bp)->bio_next) 216 217 #define GJ_HEADER_MAGIC "GJHDR" 218 219 struct g_journal_header { 220 char jh_magic[sizeof(GJ_HEADER_MAGIC)]; 221 uint32_t jh_journal_id; 222 uint32_t jh_journal_next_id; 223 } __packed; 224 225 struct g_journal_entry { 226 uint64_t je_joffset; 227 uint64_t je_offset; 228 uint64_t je_length; 229 } __packed; 230 231 #define GJ_RECORD_HEADER_MAGIC "GJRHDR" 232 #define GJ_RECORD_HEADER_NENTRIES (20) 233 #define GJ_RECORD_MAX_SIZE(sc) \ 234 ((sc)->sc_jprovider->sectorsize + GJ_RECORD_HEADER_NENTRIES * MAXPHYS) 235 #define GJ_VALIDATE_OFFSET(offset, sc) do { \ 236 if ((offset) + GJ_RECORD_MAX_SIZE(sc) >= (sc)->sc_jend) { \ 237 (offset) = (sc)->sc_jstart; \ 238 GJ_DEBUG(2, "Starting from the beginning (%s).", \ 239 (sc)->sc_name); \ 240 } \ 241 } while (0) 242 243 struct g_journal_record_header { 244 char jrh_magic[sizeof(GJ_RECORD_HEADER_MAGIC)]; 245 uint32_t jrh_journal_id; 246 uint16_t jrh_nentries; 247 u_char jrh_sum[8]; 248 struct g_journal_entry jrh_entries[GJ_RECORD_HEADER_NENTRIES]; 249 } __packed; 250 251 typedef int (g_journal_clean_t)(struct mount *mp); 252 typedef void (g_journal_dirty_t)(struct g_consumer *cp); 253 254 struct g_journal_desc { 255 const char *jd_fstype; 256 g_journal_clean_t *jd_clean; 257 g_journal_dirty_t *jd_dirty; 258 }; 259 260 /* Supported file systems. */ 261 extern const struct g_journal_desc g_journal_ufs; 262 263 #define GJ_TIMER_START(lvl, bt) do { \ 264 if (g_journal_debug >= (lvl)) \ 265 binuptime(bt); \ 266 } while (0) 267 #define GJ_TIMER_STOP(lvl, bt, ...) do { \ 268 if (g_journal_debug >= (lvl)) { \ 269 struct bintime _bt2; \ 270 struct timeval _tv; \ 271 \ 272 binuptime(&_bt2); \ 273 bintime_sub(&_bt2, bt); \ 274 bintime2timeval(&_bt2, &_tv); \ 275 printf("GEOM_JOURNAL"); \ 276 if (g_journal_debug > 0) \ 277 printf("[%u]", lvl); \ 278 printf(": "); \ 279 printf(__VA_ARGS__); \ 280 printf(": %jd.%06jds\n", (intmax_t)_tv.tv_sec, \ 281 (intmax_t)_tv.tv_usec); \ 282 } \ 283 } while (0) 284 #endif /* _KERNEL */ 285 286 #define GJ_TYPE_DATA 0x01 287 #define GJ_TYPE_JOURNAL 0x02 288 #define GJ_TYPE_COMPLETE (GJ_TYPE_DATA|GJ_TYPE_JOURNAL) 289 290 #define GJ_FLAG_CLEAN 0x01 291 #define GJ_FLAG_CHECKSUM 0x02 292 293 struct g_journal_metadata { 294 char md_magic[16]; /* Magic value. */ 295 uint32_t md_version; /* Version number. */ 296 uint32_t md_id; /* Journal unique ID. */ 297 uint8_t md_type; /* Provider type. */ 298 uint64_t md_jstart; /* Journal space start offset. */ 299 uint64_t md_jend; /* Journal space end offset. */ 300 uint64_t md_joffset; /* Last known consistent journal offset. */ 301 uint32_t md_jid; /* Last known consistent journal ID. */ 302 uint64_t md_flags; /* Journal flags. */ 303 char md_provider[16]; /* Hardcoded provider. */ 304 uint64_t md_provsize; /* Provider's size. */ 305 u_char md_hash[16]; /* MD5 hash. */ 306 }; 307 static __inline void 308 journal_metadata_encode(struct g_journal_metadata *md, u_char *data) 309 { 310 MD5_CTX ctx; 311 312 bcopy(md->md_magic, data, 16); 313 le32enc(data + 16, md->md_version); 314 le32enc(data + 20, md->md_id); 315 *(data + 24) = md->md_type; 316 le64enc(data + 25, md->md_jstart); 317 le64enc(data + 33, md->md_jend); 318 le64enc(data + 41, md->md_joffset); 319 le32enc(data + 49, md->md_jid); 320 le64enc(data + 53, md->md_flags); 321 bcopy(md->md_provider, data + 61, 16); 322 le64enc(data + 77, md->md_provsize); 323 MD5Init(&ctx); 324 MD5Update(&ctx, data, 85); 325 MD5Final(md->md_hash, &ctx); 326 bcopy(md->md_hash, data + 85, 16); 327 } 328 static __inline int 329 journal_metadata_decode_v0(const u_char *data, struct g_journal_metadata *md) 330 { 331 MD5_CTX ctx; 332 333 md->md_id = le32dec(data + 20); 334 md->md_type = *(data + 24); 335 md->md_jstart = le64dec(data + 25); 336 md->md_jend = le64dec(data + 33); 337 md->md_joffset = le64dec(data + 41); 338 md->md_jid = le32dec(data + 49); 339 md->md_flags = le64dec(data + 53); 340 bcopy(data + 61, md->md_provider, 16); 341 md->md_provsize = le64dec(data + 77); 342 MD5Init(&ctx); 343 MD5Update(&ctx, data, 85); 344 MD5Final(md->md_hash, &ctx); 345 if (bcmp(md->md_hash, data + 85, 16) != 0) 346 return (EINVAL); 347 return (0); 348 } 349 static __inline int 350 journal_metadata_decode(const u_char *data, struct g_journal_metadata *md) 351 { 352 int error; 353 354 bcopy(data, md->md_magic, 16); 355 md->md_version = le32dec(data + 16); 356 switch (md->md_version) { 357 case 0: 358 error = journal_metadata_decode_v0(data, md); 359 break; 360 default: 361 error = EINVAL; 362 break; 363 } 364 return (error); 365 } 366 367 static __inline void 368 journal_metadata_dump(const struct g_journal_metadata *md) 369 { 370 static const char hex[] = "0123456789abcdef"; 371 char hash[16 * 2 + 1]; 372 u_int i; 373 374 printf(" magic: %s\n", md->md_magic); 375 printf(" version: %u\n", (u_int)md->md_version); 376 printf(" id: %u\n", (u_int)md->md_id); 377 printf(" type: %u\n", (u_int)md->md_type); 378 printf(" start: %ju\n", (uintmax_t)md->md_jstart); 379 printf(" end: %ju\n", (uintmax_t)md->md_jend); 380 printf(" joffset: %ju\n", (uintmax_t)md->md_joffset); 381 printf(" jid: %u\n", (u_int)md->md_jid); 382 printf(" flags: %u\n", (u_int)md->md_flags); 383 printf("hcprovider: %s\n", md->md_provider); 384 printf(" provsize: %ju\n", (uintmax_t)md->md_provsize); 385 bzero(hash, sizeof(hash)); 386 for (i = 0; i < 16; i++) { 387 hash[i * 2] = hex[md->md_hash[i] >> 4]; 388 hash[i * 2 + 1] = hex[md->md_hash[i] & 0x0f]; 389 } 390 printf(" MD5 hash: %s\n", hash); 391 } 392 #endif /* !_G_JOURNAL_H_ */ 393