1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #ifndef _G_JOURNAL_H_ 32 #define _G_JOURNAL_H_ 33 34 #include <sys/endian.h> 35 #include <sys/md5.h> 36 #ifdef _KERNEL 37 #include <sys/bio.h> 38 #endif 39 40 #define G_JOURNAL_CLASS_NAME "JOURNAL" 41 42 #define G_JOURNAL_MAGIC "GEOM::JOURNAL" 43 /* 44 * Version history: 45 * 0 - Initial version number. 46 */ 47 #define G_JOURNAL_VERSION 0 48 49 #ifdef _KERNEL 50 extern int g_journal_debug; 51 52 #define GJ_DEBUG(lvl, ...) do { \ 53 if (g_journal_debug >= (lvl)) { \ 54 printf("GEOM_JOURNAL"); \ 55 if (g_journal_debug > 0) \ 56 printf("[%u]", lvl); \ 57 printf(": "); \ 58 printf(__VA_ARGS__); \ 59 printf("\n"); \ 60 } \ 61 } while (0) 62 #define GJ_LOGREQ(lvl, bp, ...) do { \ 63 if (g_journal_debug >= (lvl)) { \ 64 printf("GEOM_JOURNAL"); \ 65 if (g_journal_debug > 0) \ 66 printf("[%u]", lvl); \ 67 printf(": "); \ 68 printf(__VA_ARGS__); \ 69 printf(" "); \ 70 g_print_bio(bp); \ 71 printf("\n"); \ 72 } \ 73 } while (0) 74 75 #define JEMPTY(sc) ((sc)->sc_journal_offset - \ 76 (sc)->sc_jprovider->sectorsize == \ 77 (sc)->sc_active.jj_offset && \ 78 (sc)->sc_current_count == 0) 79 80 #define GJ_BIO_REGULAR 0x00 81 #define GJ_BIO_READ 0x01 82 #define GJ_BIO_JOURNAL 0x02 83 #define GJ_BIO_COPY 0x03 84 #define GJ_BIO_MASK 0x0f 85 86 #if 0 87 #define GJF_BIO_DONT_FREE 0x10 88 #define GJF_BIO_MASK 0xf0 89 #endif 90 91 #define GJF_DEVICE_HARDCODED 0x0001 92 #define GJF_DEVICE_DESTROY 0x0010 93 #define GJF_DEVICE_SWITCH 0x0020 94 #define GJF_DEVICE_BEFORE_SWITCH 0x0040 95 #define GJF_DEVICE_CLEAN 0x0080 96 #define GJF_DEVICE_CHECKSUM 0x0100 97 98 #define GJ_HARD_LIMIT 64 99 100 /* 101 * We keep pointers to journaled data in bio structure and because we 102 * need to store two off_t values (offset in data provider and offset in 103 * journal), we have to borrow bio_completed field for this. 104 */ 105 #define bio_joffset bio_completed 106 /* 107 * Use bio_caller1 field as a pointer in queue. 108 */ 109 #define bio_next bio_caller1 110 111 /* 112 * There are two such structures maintained inside each journaled device. 113 * One describes active part of the journal, were recent requests are stored. 114 * The second describes the last consistent part of the journal with requests 115 * that are copied to the destination provider. 116 */ 117 struct g_journal_journal { 118 struct bio *jj_queue; /* Cached journal entries. */ 119 off_t jj_offset; /* Journal's start offset. */ 120 }; 121 122 struct g_journal_softc { 123 uint32_t sc_id; 124 uint8_t sc_type; 125 uint8_t sc_orig_type; 126 struct g_geom *sc_geom; 127 u_int sc_flags; 128 struct mtx sc_mtx; 129 off_t sc_mediasize; 130 u_int sc_sectorsize; 131 #define GJ_FLUSH_DATA 0x01 132 #define GJ_FLUSH_JOURNAL 0x02 133 u_int sc_bio_flush; 134 135 uint32_t sc_journal_id; 136 uint32_t sc_journal_next_id; 137 int sc_journal_copying; 138 off_t sc_journal_offset; 139 off_t sc_journal_previous_id; 140 141 struct bio_queue_head sc_back_queue; 142 struct bio_queue_head sc_regular_queue; 143 144 struct bio_queue_head sc_delayed_queue; 145 int sc_delayed_count; 146 147 struct bio *sc_current_queue; 148 int sc_current_count; 149 150 struct bio *sc_flush_queue; 151 int sc_flush_count; 152 int sc_flush_in_progress; 153 154 struct bio *sc_copy_queue; 155 int sc_copy_in_progress; 156 157 struct g_consumer *sc_dconsumer; 158 struct g_consumer *sc_jconsumer; 159 160 struct g_journal_journal sc_inactive; 161 struct g_journal_journal sc_active; 162 163 off_t sc_jstart; /* Journal space start offset. */ 164 off_t sc_jend; /* Journal space end offset. */ 165 166 struct callout sc_callout; 167 struct proc *sc_worker; 168 169 struct root_hold_token *sc_rootmount; 170 }; 171 #define sc_dprovider sc_dconsumer->provider 172 #define sc_jprovider sc_jconsumer->provider 173 #define sc_name sc_dprovider->name 174 175 #define GJQ_INSERT_HEAD(head, bp) do { \ 176 (bp)->bio_next = (head); \ 177 (head) = (bp); \ 178 } while (0) 179 #define GJQ_INSERT_AFTER(head, bp, pbp) do { \ 180 if ((pbp) == NULL) \ 181 GJQ_INSERT_HEAD(head, bp); \ 182 else { \ 183 (bp)->bio_next = (pbp)->bio_next; \ 184 (pbp)->bio_next = (bp); \ 185 } \ 186 } while (0) 187 #define GJQ_LAST(head, bp) do { \ 188 struct bio *_bp; \ 189 \ 190 if ((head) == NULL) { \ 191 (bp) = (head); \ 192 break; \ 193 } \ 194 for (_bp = (head); _bp->bio_next != NULL; _bp = _bp->bio_next) \ 195 continue; \ 196 (bp) = (_bp); \ 197 } while (0) 198 #define GJQ_FIRST(head) (head) 199 #define GJQ_REMOVE(head, bp) do { \ 200 struct bio *_bp; \ 201 \ 202 if ((head) == (bp)) { \ 203 (head) = (bp)->bio_next; \ 204 (bp)->bio_next = NULL; \ 205 break; \ 206 } \ 207 for (_bp = (head); _bp->bio_next != NULL; _bp = _bp->bio_next) {\ 208 if (_bp->bio_next == (bp)) \ 209 break; \ 210 } \ 211 KASSERT(_bp->bio_next != NULL, ("NULL bio_next")); \ 212 KASSERT(_bp->bio_next == (bp), ("bio_next != bp")); \ 213 _bp->bio_next = (bp)->bio_next; \ 214 (bp)->bio_next = NULL; \ 215 } while (0) 216 #define GJQ_FOREACH(head, bp) \ 217 for ((bp) = (head); (bp) != NULL; (bp) = (bp)->bio_next) 218 219 #define GJ_HEADER_MAGIC "GJHDR" 220 221 struct g_journal_header { 222 char jh_magic[sizeof(GJ_HEADER_MAGIC)]; 223 uint32_t jh_journal_id; 224 uint32_t jh_journal_next_id; 225 } __packed; 226 227 struct g_journal_entry { 228 uint64_t je_joffset; 229 uint64_t je_offset; 230 uint64_t je_length; 231 } __packed; 232 233 #define GJ_RECORD_HEADER_MAGIC "GJRHDR" 234 #define GJ_RECORD_HEADER_NENTRIES (20) 235 #define GJ_RECORD_MAX_SIZE(sc) \ 236 ((sc)->sc_jprovider->sectorsize + GJ_RECORD_HEADER_NENTRIES * MAXPHYS) 237 #define GJ_VALIDATE_OFFSET(offset, sc) do { \ 238 if ((offset) + GJ_RECORD_MAX_SIZE(sc) >= (sc)->sc_jend) { \ 239 (offset) = (sc)->sc_jstart; \ 240 GJ_DEBUG(2, "Starting from the beginning (%s).", \ 241 (sc)->sc_name); \ 242 } \ 243 } while (0) 244 245 struct g_journal_record_header { 246 char jrh_magic[sizeof(GJ_RECORD_HEADER_MAGIC)]; 247 uint32_t jrh_journal_id; 248 uint16_t jrh_nentries; 249 u_char jrh_sum[8]; 250 struct g_journal_entry jrh_entries[GJ_RECORD_HEADER_NENTRIES]; 251 } __packed; 252 253 typedef int (g_journal_clean_t)(struct mount *mp); 254 typedef void (g_journal_dirty_t)(struct g_consumer *cp); 255 256 struct g_journal_desc { 257 const char *jd_fstype; 258 g_journal_clean_t *jd_clean; 259 g_journal_dirty_t *jd_dirty; 260 }; 261 262 /* Supported file systems. */ 263 extern const struct g_journal_desc g_journal_ufs; 264 265 #define GJ_TIMER_START(lvl, bt) do { \ 266 if (g_journal_debug >= (lvl)) \ 267 binuptime(bt); \ 268 } while (0) 269 #define GJ_TIMER_STOP(lvl, bt, ...) do { \ 270 if (g_journal_debug >= (lvl)) { \ 271 struct bintime _bt2; \ 272 struct timeval _tv; \ 273 \ 274 binuptime(&_bt2); \ 275 bintime_sub(&_bt2, bt); \ 276 bintime2timeval(&_bt2, &_tv); \ 277 printf("GEOM_JOURNAL"); \ 278 if (g_journal_debug > 0) \ 279 printf("[%u]", lvl); \ 280 printf(": "); \ 281 printf(__VA_ARGS__); \ 282 printf(": %jd.%06jds\n", (intmax_t)_tv.tv_sec, \ 283 (intmax_t)_tv.tv_usec); \ 284 } \ 285 } while (0) 286 #endif /* _KERNEL */ 287 288 #define GJ_TYPE_DATA 0x01 289 #define GJ_TYPE_JOURNAL 0x02 290 #define GJ_TYPE_COMPLETE (GJ_TYPE_DATA|GJ_TYPE_JOURNAL) 291 292 #define GJ_FLAG_CLEAN 0x01 293 #define GJ_FLAG_CHECKSUM 0x02 294 295 struct g_journal_metadata { 296 char md_magic[16]; /* Magic value. */ 297 uint32_t md_version; /* Version number. */ 298 uint32_t md_id; /* Journal unique ID. */ 299 uint8_t md_type; /* Provider type. */ 300 uint64_t md_jstart; /* Journal space start offset. */ 301 uint64_t md_jend; /* Journal space end offset. */ 302 uint64_t md_joffset; /* Last known consistent journal offset. */ 303 uint32_t md_jid; /* Last known consistent journal ID. */ 304 uint64_t md_flags; /* Journal flags. */ 305 char md_provider[16]; /* Hardcoded provider. */ 306 uint64_t md_provsize; /* Provider's size. */ 307 u_char md_hash[16]; /* MD5 hash. */ 308 }; 309 static __inline void 310 journal_metadata_encode(struct g_journal_metadata *md, u_char *data) 311 { 312 MD5_CTX ctx; 313 314 bcopy(md->md_magic, data, 16); 315 le32enc(data + 16, md->md_version); 316 le32enc(data + 20, md->md_id); 317 *(data + 24) = md->md_type; 318 le64enc(data + 25, md->md_jstart); 319 le64enc(data + 33, md->md_jend); 320 le64enc(data + 41, md->md_joffset); 321 le32enc(data + 49, md->md_jid); 322 le64enc(data + 53, md->md_flags); 323 bcopy(md->md_provider, data + 61, 16); 324 le64enc(data + 77, md->md_provsize); 325 MD5Init(&ctx); 326 MD5Update(&ctx, data, 85); 327 MD5Final(md->md_hash, &ctx); 328 bcopy(md->md_hash, data + 85, 16); 329 } 330 static __inline int 331 journal_metadata_decode_v0(const u_char *data, struct g_journal_metadata *md) 332 { 333 MD5_CTX ctx; 334 335 md->md_id = le32dec(data + 20); 336 md->md_type = *(data + 24); 337 md->md_jstart = le64dec(data + 25); 338 md->md_jend = le64dec(data + 33); 339 md->md_joffset = le64dec(data + 41); 340 md->md_jid = le32dec(data + 49); 341 md->md_flags = le64dec(data + 53); 342 bcopy(data + 61, md->md_provider, 16); 343 md->md_provsize = le64dec(data + 77); 344 MD5Init(&ctx); 345 MD5Update(&ctx, data, 85); 346 MD5Final(md->md_hash, &ctx); 347 if (bcmp(md->md_hash, data + 85, 16) != 0) 348 return (EINVAL); 349 return (0); 350 } 351 static __inline int 352 journal_metadata_decode(const u_char *data, struct g_journal_metadata *md) 353 { 354 int error; 355 356 bcopy(data, md->md_magic, 16); 357 md->md_version = le32dec(data + 16); 358 switch (md->md_version) { 359 case 0: 360 error = journal_metadata_decode_v0(data, md); 361 break; 362 default: 363 error = EINVAL; 364 break; 365 } 366 return (error); 367 } 368 369 static __inline void 370 journal_metadata_dump(const struct g_journal_metadata *md) 371 { 372 static const char hex[] = "0123456789abcdef"; 373 char hash[16 * 2 + 1]; 374 u_int i; 375 376 printf(" magic: %s\n", md->md_magic); 377 printf(" version: %u\n", (u_int)md->md_version); 378 printf(" id: %u\n", (u_int)md->md_id); 379 printf(" type: %u\n", (u_int)md->md_type); 380 printf(" start: %ju\n", (uintmax_t)md->md_jstart); 381 printf(" end: %ju\n", (uintmax_t)md->md_jend); 382 printf(" joffset: %ju\n", (uintmax_t)md->md_joffset); 383 printf(" jid: %u\n", (u_int)md->md_jid); 384 printf(" flags: %u\n", (u_int)md->md_flags); 385 printf("hcprovider: %s\n", md->md_provider); 386 printf(" provsize: %ju\n", (uintmax_t)md->md_provsize); 387 bzero(hash, sizeof(hash)); 388 for (i = 0; i < 16; i++) { 389 hash[i * 2] = hex[md->md_hash[i] >> 4]; 390 hash[i * 2 + 1] = hex[md->md_hash[i] & 0x0f]; 391 } 392 printf(" MD5 hash: %s\n", hash); 393 } 394 #endif /* !_G_JOURNAL_H_ */ 395