xref: /freebsd/sys/geom/journal/g_journal.h (revision 1e413cf93298b5b97441a21d9a50fdcd0ee9945e)
1 /*-
2  * Copyright (c) 2005-2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #ifndef	_G_JOURNAL_H_
30 #define	_G_JOURNAL_H_
31 
32 #include <sys/endian.h>
33 #include <sys/md5.h>
34 #ifdef _KERNEL
35 #include <sys/bio.h>
36 #endif
37 
38 #define	G_JOURNAL_CLASS_NAME	"JOURNAL"
39 
40 #define	G_JOURNAL_MAGIC		"GEOM::JOURNAL"
41 /*
42  * Version history:
43  * 0 - Initial version number.
44  */
45 #define	G_JOURNAL_VERSION	0
46 
47 #ifdef _KERNEL
48 extern int g_journal_debug;
49 
50 #define	GJ_DEBUG(lvl, ...)	do {					\
51 	if (g_journal_debug >= (lvl)) {					\
52 		printf("GEOM_JOURNAL");					\
53 		if (g_journal_debug > 0)				\
54 			printf("[%u]", lvl);				\
55 		printf(": ");						\
56 		printf(__VA_ARGS__);					\
57 		printf("\n");						\
58 	}								\
59 } while (0)
60 #define	GJ_LOGREQ(lvl, bp, ...)	do {					\
61 	if (g_journal_debug >= (lvl)) {					\
62 		printf("GEOM_JOURNAL");					\
63 		if (g_journal_debug > 0)				\
64 			printf("[%u]", lvl);				\
65 		printf(": ");						\
66 		printf(__VA_ARGS__);					\
67 		printf(" ");						\
68 		g_print_bio(bp);					\
69 		printf("\n");						\
70 	}								\
71 } while (0)
72 
73 #define	JEMPTY(sc)	((sc)->sc_journal_offset -			\
74 			 (sc)->sc_jprovider->sectorsize ==		\
75 			 (sc)->sc_active.jj_offset &&			\
76 			 (sc)->sc_current_count == 0)
77 
78 #define	GJ_BIO_REGULAR		0x00
79 #define	GJ_BIO_READ		0x01
80 #define	GJ_BIO_JOURNAL		0x02
81 #define	GJ_BIO_COPY		0x03
82 #define	GJ_BIO_MASK		0x0f
83 
84 #if 0
85 #define	GJF_BIO_DONT_FREE	0x10
86 #define	GJF_BIO_MASK		0xf0
87 #endif
88 
89 #define	GJF_DEVICE_HARDCODED		0x0001
90 #define	GJF_DEVICE_DESTROY		0x0010
91 #define	GJF_DEVICE_SWITCH		0x0020
92 #define	GJF_DEVICE_BEFORE_SWITCH	0x0040
93 #define	GJF_DEVICE_CLEAN		0x0080
94 #define	GJF_DEVICE_CHECKSUM		0x0100
95 
96 #define	GJ_HARD_LIMIT		64
97 
98 /*
99  * We keep pointers to journaled data in bio structure and because we
100  * need to store two off_t values (offset in data provider and offset in
101  * journal), we have to borrow bio_completed field for this.
102  */
103 #define	bio_joffset	bio_completed
104 /*
105  * Use bio_caller1 field as a pointer in queue.
106  */
107 #define	bio_next	bio_caller1
108 
109 /*
110  * There are two such structures maintained inside each journaled device.
111  * One describes active part of the journal, were recent requests are stored.
112  * The second describes the last consistent part of the journal with requests
113  * that are copied to the destination provider.
114  */
115 struct g_journal_journal {
116 	struct bio	*jj_queue;	/* Cached journal entries. */
117 	off_t		 jj_offset;	/* Journal's start offset. */
118 };
119 
120 struct g_journal_softc {
121 	uint32_t	 sc_id;
122 	uint8_t		 sc_type;
123 	uint8_t		 sc_orig_type;
124 	struct g_geom	*sc_geom;
125 	u_int		 sc_flags;
126 	struct mtx	 sc_mtx;
127 	off_t		 sc_mediasize;
128 	u_int		 sc_sectorsize;
129 #define	GJ_FLUSH_DATA		0x01
130 #define	GJ_FLUSH_JOURNAL	0x02
131 	u_int		 sc_bio_flush;
132 
133 	uint32_t	 sc_journal_id;
134 	uint32_t	 sc_journal_next_id;
135 	int		 sc_journal_copying;
136 	off_t		 sc_journal_offset;
137 	off_t		 sc_journal_previous_id;
138 
139 	struct bio_queue_head sc_back_queue;
140 	struct bio_queue_head sc_regular_queue;
141 
142 	struct bio_queue_head sc_delayed_queue;
143 	int		 sc_delayed_count;
144 
145 	struct bio	*sc_current_queue;
146 	int		 sc_current_count;
147 
148 	struct bio	*sc_flush_queue;
149 	int		 sc_flush_count;
150 	int		 sc_flush_in_progress;
151 
152 	struct bio	*sc_copy_queue;
153 	int		 sc_copy_in_progress;
154 
155 	struct g_consumer *sc_dconsumer;
156 	struct g_consumer *sc_jconsumer;
157 
158 	struct g_journal_journal sc_inactive;
159 	struct g_journal_journal sc_active;
160 
161 	off_t		 sc_jstart;	/* Journal space start offset. */
162 	off_t		 sc_jend;	/* Journal space end offset. */
163 
164 	struct callout	 sc_callout;
165 	struct proc	*sc_worker;
166 };
167 #define	sc_dprovider	sc_dconsumer->provider
168 #define	sc_jprovider	sc_jconsumer->provider
169 #define	sc_name		sc_dprovider->name
170 
171 #define	GJQ_INSERT_HEAD(head, bp)	do {				\
172 	(bp)->bio_next = (head);					\
173 	(head) = (bp);							\
174 } while (0)
175 #define	GJQ_INSERT_AFTER(head, bp, pbp)	do {				\
176 	if ((pbp) == NULL)						\
177 		GJQ_INSERT_HEAD(head, bp);				\
178 	else {								\
179 		(bp)->bio_next = (pbp)->bio_next;			\
180 		(pbp)->bio_next = (bp);					\
181 	}								\
182 } while (0)
183 #define	GJQ_FIRST(head)	(head)
184 #define	GJQ_REMOVE(head, bp)	do {					\
185 	struct bio *_bp;						\
186 									\
187 	if ((head) == (bp)) {						\
188 		(head) = (bp)->bio_next;				\
189 		(bp)->bio_next = NULL;					\
190 		break;							\
191 	}								\
192 	for (_bp = (head); _bp->bio_next != NULL; _bp = _bp->bio_next) {\
193 		if (_bp->bio_next == (bp))				\
194 			break;						\
195 	}								\
196 	KASSERT(_bp->bio_next != NULL, ("NULL bio_next"));		\
197 	KASSERT(_bp->bio_next == (bp), ("bio_next != bp"));		\
198 	_bp->bio_next = (bp)->bio_next;					\
199 	(bp)->bio_next = NULL;						\
200 } while (0)
201 #define GJQ_FOREACH(head, bp)						\
202 	for ((bp) = (head); (bp) != NULL; (bp) = (bp)->bio_next)
203 
204 #define	GJ_HEADER_MAGIC	"GJHDR"
205 
206 struct g_journal_header {
207 	char		jh_magic[sizeof(GJ_HEADER_MAGIC)];
208 	uint32_t	jh_journal_id;
209 	uint32_t	jh_journal_next_id;
210 } __packed;
211 
212 struct g_journal_entry {
213 	uint64_t	je_joffset;
214 	uint64_t	je_offset;
215 	uint64_t	je_length;
216 } __packed;
217 
218 #define	GJ_RECORD_HEADER_MAGIC		"GJRHDR"
219 #define	GJ_RECORD_HEADER_NENTRIES	(20)
220 #define	GJ_RECORD_MAX_SIZE(sc)	\
221 	((sc)->sc_jprovider->sectorsize + GJ_RECORD_HEADER_NENTRIES * MAXPHYS)
222 #define	GJ_VALIDATE_OFFSET(offset, sc)	do {				\
223 	if ((offset) + GJ_RECORD_MAX_SIZE(sc) >= (sc)->sc_jend) {	\
224 		(offset) = (sc)->sc_jstart;				\
225 		GJ_DEBUG(2, "Starting from the begining (%s).",		\
226 		    (sc)->sc_name);					\
227 	}								\
228 } while (0)
229 
230 struct g_journal_record_header {
231 	char		jrh_magic[sizeof(GJ_RECORD_HEADER_MAGIC)];
232 	uint32_t	jrh_journal_id;
233 	uint16_t	jrh_nentries;
234 	u_char		jrh_sum[8];
235 	struct g_journal_entry jrh_entries[GJ_RECORD_HEADER_NENTRIES];
236 } __packed;
237 
238 typedef int (g_journal_clean_t)(struct mount *mp);
239 typedef void (g_journal_dirty_t)(struct g_consumer *cp);
240 
241 struct g_journal_desc {
242 	const char		*jd_fstype;
243 	g_journal_clean_t	*jd_clean;
244 	g_journal_dirty_t	*jd_dirty;
245 };
246 
247 /* Supported file systems. */
248 extern const struct g_journal_desc g_journal_ufs;
249 
250 #define	GJ_TIMER_START(lvl, bt)	do {					\
251 	if (g_journal_debug >= (lvl))					\
252 		binuptime(bt);						\
253 } while (0)
254 #define	GJ_TIMER_STOP(lvl, bt, ...)	do {				\
255 	if (g_journal_debug >= (lvl)) {					\
256 		struct bintime _bt2;					\
257 		struct timeval _tv;					\
258 									\
259 		binuptime(&_bt2);					\
260 		bintime_sub(&_bt2, bt);					\
261 		bintime2timeval(&_bt2, &_tv);				\
262 		printf("GEOM_JOURNAL");					\
263 		if (g_journal_debug > 0)				\
264 			printf("[%u]", lvl);				\
265 		printf(": ");						\
266 		printf(__VA_ARGS__);					\
267 		printf(": %jd.%06jds\n", (intmax_t)_tv.tv_sec,		\
268 		    (intmax_t)_tv.tv_usec);				\
269 	}								\
270 } while (0)
271 #endif	/* _KERNEL */
272 
273 #define	GJ_TYPE_DATA		0x01
274 #define	GJ_TYPE_JOURNAL		0x02
275 #define	GJ_TYPE_COMPLETE	(GJ_TYPE_DATA|GJ_TYPE_JOURNAL)
276 
277 #define	GJ_FLAG_CLEAN		0x01
278 #define	GJ_FLAG_CHECKSUM	0x02
279 
280 struct g_journal_metadata {
281 	char		md_magic[16];	/* Magic value. */
282 	uint32_t	md_version;	/* Version number. */
283 	uint32_t	md_id;		/* Journal unique ID. */
284 	uint8_t		md_type;	/* Provider type. */
285 	uint64_t	md_jstart;	/* Journal space start offset. */
286 	uint64_t	md_jend;	/* Journal space end offset. */
287 	uint64_t	md_joffset;	/* Last known consistent journal offset. */
288 	uint32_t	md_jid;		/* Last known consistent journal ID. */
289 	uint64_t	md_flags;	/* Journal flags. */
290 	char		md_provider[16]; /* Hardcoded provider. */
291 	uint64_t	md_provsize;	/* Provider's size. */
292 	u_char		md_hash[16];	/* MD5 hash. */
293 };
294 static __inline void
295 journal_metadata_encode(struct g_journal_metadata *md, u_char *data)
296 {
297 	MD5_CTX ctx;
298 
299 	bcopy(md->md_magic, data, 16);
300 	le32enc(data + 16, md->md_version);
301 	le32enc(data + 20, md->md_id);
302 	*(data + 24) = md->md_type;
303 	le64enc(data + 25, md->md_jstart);
304 	le64enc(data + 33, md->md_jend);
305 	le64enc(data + 41, md->md_joffset);
306 	le32enc(data + 49, md->md_jid);
307 	le64enc(data + 53, md->md_flags);
308 	bcopy(md->md_provider, data + 61, 16);
309 	le64enc(data + 77, md->md_provsize);
310 	MD5Init(&ctx);
311 	MD5Update(&ctx, data, 85);
312 	MD5Final(md->md_hash, &ctx);
313 	bcopy(md->md_hash, data + 85, 16);
314 }
315 static __inline int
316 journal_metadata_decode_v0(const u_char *data, struct g_journal_metadata *md)
317 {
318 	MD5_CTX ctx;
319 
320 	md->md_id = le32dec(data + 20);
321 	md->md_type = *(data + 24);
322 	md->md_jstart = le64dec(data + 25);
323 	md->md_jend = le64dec(data + 33);
324 	md->md_joffset = le64dec(data + 41);
325 	md->md_jid = le32dec(data + 49);
326 	md->md_flags = le64dec(data + 53);
327 	bcopy(data + 61, md->md_provider, 16);
328 	md->md_provsize = le64dec(data + 77);
329 	MD5Init(&ctx);
330 	MD5Update(&ctx, data, 85);
331 	MD5Final(md->md_hash, &ctx);
332 	if (bcmp(md->md_hash, data + 85, 16) != 0)
333 		return (EINVAL);
334 	return (0);
335 }
336 static __inline int
337 journal_metadata_decode(const u_char *data, struct g_journal_metadata *md)
338 {
339 	int error;
340 
341 	bcopy(data, md->md_magic, 16);
342 	md->md_version = le32dec(data + 16);
343 	switch (md->md_version) {
344 	case 0:
345 		error = journal_metadata_decode_v0(data, md);
346 		break;
347 	default:
348 		error = EINVAL;
349 		break;
350 	}
351 	return (error);
352 }
353 
354 static __inline void
355 journal_metadata_dump(const struct g_journal_metadata *md)
356 {
357 	static const char hex[] = "0123456789abcdef";
358 	char hash[16 * 2 + 1];
359 	u_int i;
360 
361 	printf("     magic: %s\n", md->md_magic);
362 	printf("   version: %u\n", (u_int)md->md_version);
363 	printf("        id: %u\n", (u_int)md->md_id);
364 	printf("      type: %u\n", (u_int)md->md_type);
365 	printf("     start: %ju\n", (uintmax_t)md->md_jstart);
366 	printf("       end: %ju\n", (uintmax_t)md->md_jend);
367 	printf("   joffset: %ju\n", (uintmax_t)md->md_joffset);
368 	printf("       jid: %u\n", (u_int)md->md_jid);
369 	printf("     flags: %u\n", (u_int)md->md_flags);
370 	printf("hcprovider: %s\n", md->md_provider);
371 	printf("  provsize: %ju\n", (uintmax_t)md->md_provsize);
372 	bzero(hash, sizeof(hash));
373 	for (i = 0; i < 16; i++) {
374 		hash[i * 2] = hex[md->md_hash[i] >> 4];
375 		hash[i * 2 + 1] = hex[md->md_hash[i] & 0x0f];
376 	}
377 	printf("  MD5 hash: %s\n", hash);
378 }
379 #endif	/* !_G_JOURNAL_H_ */
380