xref: /freebsd/sys/geom/journal/g_journal.h (revision b9f654b163bce26de79705e77b872427c9f2afa1)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 #ifndef	_G_JOURNAL_H_
32 #define	_G_JOURNAL_H_
33 
34 #include <sys/endian.h>
35 #include <sys/md5.h>
36 #ifdef _KERNEL
37 #include <sys/bio.h>
38 #endif
39 
40 #define	G_JOURNAL_CLASS_NAME	"JOURNAL"
41 
42 #define	G_JOURNAL_MAGIC		"GEOM::JOURNAL"
43 /*
44  * Version history:
45  * 0 - Initial version number.
46  */
47 #define	G_JOURNAL_VERSION	0
48 
49 #ifdef _KERNEL
50 extern int g_journal_debug;
51 
52 #define	GJ_DEBUG(lvl, ...)	do {					\
53 	if (g_journal_debug >= (lvl)) {					\
54 		printf("GEOM_JOURNAL");					\
55 		if (g_journal_debug > 0)				\
56 			printf("[%u]", lvl);				\
57 		printf(": ");						\
58 		printf(__VA_ARGS__);					\
59 		printf("\n");						\
60 	}								\
61 } while (0)
62 #define	GJ_LOGREQ(lvl, bp, ...)	do {					\
63 	if (g_journal_debug >= (lvl)) {					\
64 		printf("GEOM_JOURNAL");					\
65 		if (g_journal_debug > 0)				\
66 			printf("[%u]", lvl);				\
67 		printf(": ");						\
68 		printf(__VA_ARGS__);					\
69 		printf(" ");						\
70 		g_print_bio(bp);					\
71 		printf("\n");						\
72 	}								\
73 } while (0)
74 
75 #define	JEMPTY(sc)	((sc)->sc_journal_offset -			\
76 			 (sc)->sc_jprovider->sectorsize ==		\
77 			 (sc)->sc_active.jj_offset &&			\
78 			 (sc)->sc_current_count == 0)
79 
80 #define	GJ_BIO_REGULAR		0x00
81 #define	GJ_BIO_READ		0x01
82 #define	GJ_BIO_JOURNAL		0x02
83 #define	GJ_BIO_COPY		0x03
84 #define	GJ_BIO_MASK		0x0f
85 
86 #if 0
87 #define	GJF_BIO_DONT_FREE	0x10
88 #define	GJF_BIO_MASK		0xf0
89 #endif
90 
91 #define	GJF_DEVICE_HARDCODED		0x0001
92 #define	GJF_DEVICE_DESTROY		0x0010
93 #define	GJF_DEVICE_SWITCH		0x0020
94 #define	GJF_DEVICE_BEFORE_SWITCH	0x0040
95 #define	GJF_DEVICE_CLEAN		0x0080
96 #define	GJF_DEVICE_CHECKSUM		0x0100
97 
98 #define	GJ_HARD_LIMIT		64
99 
100 /*
101  * We keep pointers to journaled data in bio structure and because we
102  * need to store two off_t values (offset in data provider and offset in
103  * journal), we have to borrow bio_completed field for this.
104  */
105 #define	bio_joffset	bio_completed
106 /*
107  * Use bio_caller1 field as a pointer in queue.
108  */
109 #define	bio_next	bio_caller1
110 
111 /*
112  * There are two such structures maintained inside each journaled device.
113  * One describes active part of the journal, were recent requests are stored.
114  * The second describes the last consistent part of the journal with requests
115  * that are copied to the destination provider.
116  */
117 struct g_journal_journal {
118 	struct bio	*jj_queue;	/* Cached journal entries. */
119 	off_t		 jj_offset;	/* Journal's start offset. */
120 };
121 
122 struct g_journal_softc {
123 	uint32_t	 sc_id;
124 	uint8_t		 sc_type;
125 	uint8_t		 sc_orig_type;
126 	struct g_geom	*sc_geom;
127 	u_int		 sc_flags;
128 	struct mtx	 sc_mtx;
129 	off_t		 sc_mediasize;
130 	u_int		 sc_sectorsize;
131 #define	GJ_FLUSH_DATA		0x01
132 #define	GJ_FLUSH_JOURNAL	0x02
133 	u_int		 sc_bio_flush;
134 
135 	uint32_t	 sc_journal_id;
136 	uint32_t	 sc_journal_next_id;
137 	int		 sc_journal_copying;
138 	off_t		 sc_journal_offset;
139 	off_t		 sc_journal_previous_id;
140 
141 	struct bio_queue_head sc_back_queue;
142 	struct bio_queue_head sc_regular_queue;
143 
144 	struct bio_queue_head sc_delayed_queue;
145 	int		 sc_delayed_count;
146 
147 	struct bio	*sc_current_queue;
148 	int		 sc_current_count;
149 
150 	struct bio	*sc_flush_queue;
151 	int		 sc_flush_count;
152 	int		 sc_flush_in_progress;
153 
154 	struct bio	*sc_copy_queue;
155 	int		 sc_copy_in_progress;
156 
157 	struct g_consumer *sc_dconsumer;
158 	struct g_consumer *sc_jconsumer;
159 
160 	struct g_journal_journal sc_inactive;
161 	struct g_journal_journal sc_active;
162 
163 	off_t		 sc_jstart;	/* Journal space start offset. */
164 	off_t		 sc_jend;	/* Journal space end offset. */
165 
166 	struct callout	 sc_callout;
167 	struct proc	*sc_worker;
168 
169 	struct root_hold_token *sc_rootmount;
170 };
171 #define	sc_dprovider	sc_dconsumer->provider
172 #define	sc_jprovider	sc_jconsumer->provider
173 #define	sc_name		sc_dprovider->name
174 
175 #define	GJQ_INSERT_HEAD(head, bp)	do {				\
176 	(bp)->bio_next = (head);					\
177 	(head) = (bp);							\
178 } while (0)
179 #define	GJQ_INSERT_AFTER(head, bp, pbp)	do {				\
180 	if ((pbp) == NULL)						\
181 		GJQ_INSERT_HEAD(head, bp);				\
182 	else {								\
183 		(bp)->bio_next = (pbp)->bio_next;			\
184 		(pbp)->bio_next = (bp);					\
185 	}								\
186 } while (0)
187 #define GJQ_LAST(head, bp) do {						\
188 	struct bio *_bp;						\
189 									\
190 	if ((head) == NULL) {						\
191 		(bp) = (head);						\
192 		break;							\
193 	}								\
194 	for (_bp = (head); _bp->bio_next != NULL; _bp = _bp->bio_next)	\
195 		continue;						\
196 	(bp) = (_bp);							\
197 } while (0)
198 #define	GJQ_FIRST(head)	(head)
199 #define	GJQ_REMOVE(head, bp)	do {					\
200 	struct bio *_bp;						\
201 									\
202 	if ((head) == (bp)) {						\
203 		(head) = (bp)->bio_next;				\
204 		(bp)->bio_next = NULL;					\
205 		break;							\
206 	}								\
207 	for (_bp = (head); _bp->bio_next != NULL; _bp = _bp->bio_next) {\
208 		if (_bp->bio_next == (bp))				\
209 			break;						\
210 	}								\
211 	KASSERT(_bp->bio_next != NULL, ("NULL bio_next"));		\
212 	KASSERT(_bp->bio_next == (bp), ("bio_next != bp"));		\
213 	_bp->bio_next = (bp)->bio_next;					\
214 	(bp)->bio_next = NULL;						\
215 } while (0)
216 #define GJQ_FOREACH(head, bp)						\
217 	for ((bp) = (head); (bp) != NULL; (bp) = (bp)->bio_next)
218 
219 #define	GJ_HEADER_MAGIC	"GJHDR"
220 
221 struct g_journal_header {
222 	char		jh_magic[sizeof(GJ_HEADER_MAGIC)];
223 	uint32_t	jh_journal_id;
224 	uint32_t	jh_journal_next_id;
225 } __packed;
226 
227 struct g_journal_entry {
228 	uint64_t	je_joffset;
229 	uint64_t	je_offset;
230 	uint64_t	je_length;
231 } __packed;
232 
233 #define	GJ_RECORD_HEADER_MAGIC		"GJRHDR"
234 #define	GJ_RECORD_HEADER_NENTRIES	(20)
235 #define	GJ_RECORD_MAX_SIZE(sc)	\
236 	((sc)->sc_jprovider->sectorsize + GJ_RECORD_HEADER_NENTRIES * MAXPHYS)
237 #define	GJ_VALIDATE_OFFSET(offset, sc)	do {				\
238 	if ((offset) + GJ_RECORD_MAX_SIZE(sc) >= (sc)->sc_jend) {	\
239 		(offset) = (sc)->sc_jstart;				\
240 		GJ_DEBUG(2, "Starting from the beginning (%s).",		\
241 		    (sc)->sc_name);					\
242 	}								\
243 } while (0)
244 
245 struct g_journal_record_header {
246 	char		jrh_magic[sizeof(GJ_RECORD_HEADER_MAGIC)];
247 	uint32_t	jrh_journal_id;
248 	uint16_t	jrh_nentries;
249 	u_char		jrh_sum[8];
250 	struct g_journal_entry jrh_entries[GJ_RECORD_HEADER_NENTRIES];
251 } __packed;
252 
253 typedef int (g_journal_clean_t)(struct mount *mp);
254 typedef void (g_journal_dirty_t)(struct g_consumer *cp);
255 
256 struct g_journal_desc {
257 	const char		*jd_fstype;
258 	g_journal_clean_t	*jd_clean;
259 	g_journal_dirty_t	*jd_dirty;
260 };
261 
262 /* Supported file systems. */
263 extern const struct g_journal_desc g_journal_ufs;
264 
265 #define	GJ_TIMER_START(lvl, bt)	do {					\
266 	if (g_journal_debug >= (lvl))					\
267 		binuptime(bt);						\
268 } while (0)
269 #define	GJ_TIMER_STOP(lvl, bt, ...)	do {				\
270 	if (g_journal_debug >= (lvl)) {					\
271 		struct bintime _bt2;					\
272 		struct timeval _tv;					\
273 									\
274 		binuptime(&_bt2);					\
275 		bintime_sub(&_bt2, bt);					\
276 		bintime2timeval(&_bt2, &_tv);				\
277 		printf("GEOM_JOURNAL");					\
278 		if (g_journal_debug > 0)				\
279 			printf("[%u]", lvl);				\
280 		printf(": ");						\
281 		printf(__VA_ARGS__);					\
282 		printf(": %jd.%06jds\n", (intmax_t)_tv.tv_sec,		\
283 		    (intmax_t)_tv.tv_usec);				\
284 	}								\
285 } while (0)
286 #endif	/* _KERNEL */
287 
288 #define	GJ_TYPE_DATA		0x01
289 #define	GJ_TYPE_JOURNAL		0x02
290 #define	GJ_TYPE_COMPLETE	(GJ_TYPE_DATA|GJ_TYPE_JOURNAL)
291 
292 #define	GJ_FLAG_CLEAN		0x01
293 #define	GJ_FLAG_CHECKSUM	0x02
294 
295 struct g_journal_metadata {
296 	char		md_magic[16];	/* Magic value. */
297 	uint32_t	md_version;	/* Version number. */
298 	uint32_t	md_id;		/* Journal unique ID. */
299 	uint8_t		md_type;	/* Provider type. */
300 	uint64_t	md_jstart;	/* Journal space start offset. */
301 	uint64_t	md_jend;	/* Journal space end offset. */
302 	uint64_t	md_joffset;	/* Last known consistent journal offset. */
303 	uint32_t	md_jid;		/* Last known consistent journal ID. */
304 	uint64_t	md_flags;	/* Journal flags. */
305 	char		md_provider[16]; /* Hardcoded provider. */
306 	uint64_t	md_provsize;	/* Provider's size. */
307 	u_char		md_hash[16];	/* MD5 hash. */
308 };
309 static __inline void
310 journal_metadata_encode(struct g_journal_metadata *md, u_char *data)
311 {
312 	MD5_CTX ctx;
313 
314 	bcopy(md->md_magic, data, 16);
315 	le32enc(data + 16, md->md_version);
316 	le32enc(data + 20, md->md_id);
317 	*(data + 24) = md->md_type;
318 	le64enc(data + 25, md->md_jstart);
319 	le64enc(data + 33, md->md_jend);
320 	le64enc(data + 41, md->md_joffset);
321 	le32enc(data + 49, md->md_jid);
322 	le64enc(data + 53, md->md_flags);
323 	bcopy(md->md_provider, data + 61, 16);
324 	le64enc(data + 77, md->md_provsize);
325 	MD5Init(&ctx);
326 	MD5Update(&ctx, data, 85);
327 	MD5Final(md->md_hash, &ctx);
328 	bcopy(md->md_hash, data + 85, 16);
329 }
330 static __inline int
331 journal_metadata_decode_v0(const u_char *data, struct g_journal_metadata *md)
332 {
333 	MD5_CTX ctx;
334 
335 	md->md_id = le32dec(data + 20);
336 	md->md_type = *(data + 24);
337 	md->md_jstart = le64dec(data + 25);
338 	md->md_jend = le64dec(data + 33);
339 	md->md_joffset = le64dec(data + 41);
340 	md->md_jid = le32dec(data + 49);
341 	md->md_flags = le64dec(data + 53);
342 	bcopy(data + 61, md->md_provider, 16);
343 	md->md_provsize = le64dec(data + 77);
344 	MD5Init(&ctx);
345 	MD5Update(&ctx, data, 85);
346 	MD5Final(md->md_hash, &ctx);
347 	if (bcmp(md->md_hash, data + 85, 16) != 0)
348 		return (EINVAL);
349 	return (0);
350 }
351 static __inline int
352 journal_metadata_decode(const u_char *data, struct g_journal_metadata *md)
353 {
354 	int error;
355 
356 	bcopy(data, md->md_magic, 16);
357 	md->md_version = le32dec(data + 16);
358 	switch (md->md_version) {
359 	case 0:
360 		error = journal_metadata_decode_v0(data, md);
361 		break;
362 	default:
363 		error = EINVAL;
364 		break;
365 	}
366 	return (error);
367 }
368 
369 static __inline void
370 journal_metadata_dump(const struct g_journal_metadata *md)
371 {
372 	static const char hex[] = "0123456789abcdef";
373 	char hash[16 * 2 + 1];
374 	u_int i;
375 
376 	printf("     magic: %s\n", md->md_magic);
377 	printf("   version: %u\n", (u_int)md->md_version);
378 	printf("        id: %u\n", (u_int)md->md_id);
379 	printf("      type: %u\n", (u_int)md->md_type);
380 	printf("     start: %ju\n", (uintmax_t)md->md_jstart);
381 	printf("       end: %ju\n", (uintmax_t)md->md_jend);
382 	printf("   joffset: %ju\n", (uintmax_t)md->md_joffset);
383 	printf("       jid: %u\n", (u_int)md->md_jid);
384 	printf("     flags: %u\n", (u_int)md->md_flags);
385 	printf("hcprovider: %s\n", md->md_provider);
386 	printf("  provsize: %ju\n", (uintmax_t)md->md_provsize);
387 	bzero(hash, sizeof(hash));
388 	for (i = 0; i < 16; i++) {
389 		hash[i * 2] = hex[md->md_hash[i] >> 4];
390 		hash[i * 2 + 1] = hex[md->md_hash[i] & 0x0f];
391 	}
392 	printf("  MD5 hash: %s\n", hash);
393 }
394 #endif	/* !_G_JOURNAL_H_ */
395