xref: /freebsd/sys/contrib/openzfs/module/zfs/blkptr.c (revision da5137abdf463bb5fee85061958a14dd12bc043e)
1eda14cbcSMatt Macy /*
2eda14cbcSMatt Macy  * CDDL HEADER START
3eda14cbcSMatt Macy  *
4eda14cbcSMatt Macy  * This file and its contents are supplied under the terms of the
5eda14cbcSMatt Macy  * Common Development and Distribution License ("CDDL"), version 1.0.
6eda14cbcSMatt Macy  * You may only use this file in accordance with the terms of version
7eda14cbcSMatt Macy  * 1.0 of the CDDL.
8eda14cbcSMatt Macy  *
9eda14cbcSMatt Macy  * A full copy of the text of the CDDL should have accompanied this
10eda14cbcSMatt Macy  * source.  A copy of the CDDL is also available via the Internet at
11eda14cbcSMatt Macy  * http://www.illumos.org/license/CDDL.
12eda14cbcSMatt Macy  *
13eda14cbcSMatt Macy  * CDDL HEADER END
14eda14cbcSMatt Macy  */
15eda14cbcSMatt Macy 
16eda14cbcSMatt Macy /*
17eda14cbcSMatt Macy  * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
18eda14cbcSMatt Macy  */
19eda14cbcSMatt Macy 
20eda14cbcSMatt Macy #include <sys/blkptr.h>
21eda14cbcSMatt Macy #include <sys/zfs_context.h>
22eda14cbcSMatt Macy #include <sys/zio.h>
23eda14cbcSMatt Macy #include <sys/zio_compress.h>
24eda14cbcSMatt Macy 
25eda14cbcSMatt Macy /*
26eda14cbcSMatt Macy  * Embedded-data Block Pointers
27eda14cbcSMatt Macy  *
28eda14cbcSMatt Macy  * Normally, block pointers point (via their DVAs) to a block which holds data.
29eda14cbcSMatt Macy  * If the data that we need to store is very small, this is an inefficient
30eda14cbcSMatt Macy  * use of space, because a block must be at minimum 1 sector (typically 512
31eda14cbcSMatt Macy  * bytes or 4KB).  Additionally, reading these small blocks tends to generate
32eda14cbcSMatt Macy  * more random reads.
33eda14cbcSMatt Macy  *
34eda14cbcSMatt Macy  * Embedded-data Block Pointers allow small pieces of data (the "payload",
35eda14cbcSMatt Macy  * up to 112 bytes) to be stored in the block pointer itself, instead of
36eda14cbcSMatt Macy  * being pointed to.  The "Pointer" part of this name is a bit of a
37eda14cbcSMatt Macy  * misnomer, as nothing is pointed to.
38eda14cbcSMatt Macy  *
39eda14cbcSMatt Macy  * BP_EMBEDDED_TYPE_DATA block pointers allow highly-compressible data to
40eda14cbcSMatt Macy  * be embedded in the block pointer.  The logic for this is handled in
41eda14cbcSMatt Macy  * the SPA, by the zio pipeline.  Therefore most code outside the zio
42eda14cbcSMatt Macy  * pipeline doesn't need special-cases to handle these block pointers.
43eda14cbcSMatt Macy  *
44eda14cbcSMatt Macy  * See spa.h for details on the exact layout of embedded block pointers.
45eda14cbcSMatt Macy  */
46eda14cbcSMatt Macy 
47eda14cbcSMatt Macy void
48eda14cbcSMatt Macy encode_embedded_bp_compressed(blkptr_t *bp, void *data,
49eda14cbcSMatt Macy     enum zio_compress comp, int uncompressed_size, int compressed_size)
50eda14cbcSMatt Macy {
51eda14cbcSMatt Macy 	uint64_t *bp64 = (uint64_t *)bp;
52eda14cbcSMatt Macy 	uint64_t w = 0;
53eda14cbcSMatt Macy 	uint8_t *data8 = data;
54eda14cbcSMatt Macy 
55eda14cbcSMatt Macy 	ASSERT3U(compressed_size, <=, BPE_PAYLOAD_SIZE);
56eda14cbcSMatt Macy 	ASSERT(uncompressed_size == compressed_size ||
57eda14cbcSMatt Macy 	    comp != ZIO_COMPRESS_OFF);
58eda14cbcSMatt Macy 	ASSERT3U(comp, >=, ZIO_COMPRESS_OFF);
59eda14cbcSMatt Macy 	ASSERT3U(comp, <, ZIO_COMPRESS_FUNCTIONS);
60eda14cbcSMatt Macy 
61*da5137abSMartin Matuska 	memset(bp, 0, sizeof (*bp));
62eda14cbcSMatt Macy 	BP_SET_EMBEDDED(bp, B_TRUE);
63eda14cbcSMatt Macy 	BP_SET_COMPRESS(bp, comp);
64eda14cbcSMatt Macy 	BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
65eda14cbcSMatt Macy 	BPE_SET_LSIZE(bp, uncompressed_size);
66eda14cbcSMatt Macy 	BPE_SET_PSIZE(bp, compressed_size);
67eda14cbcSMatt Macy 
68eda14cbcSMatt Macy 	/*
69eda14cbcSMatt Macy 	 * Encode the byte array into the words of the block pointer.
70eda14cbcSMatt Macy 	 * First byte goes into low bits of first word (little endian).
71eda14cbcSMatt Macy 	 */
72eda14cbcSMatt Macy 	for (int i = 0; i < compressed_size; i++) {
73eda14cbcSMatt Macy 		BF64_SET(w, (i % sizeof (w)) * NBBY, NBBY, data8[i]);
74eda14cbcSMatt Macy 		if (i % sizeof (w) == sizeof (w) - 1) {
75eda14cbcSMatt Macy 			/* we've reached the end of a word */
76eda14cbcSMatt Macy 			ASSERT3P(bp64, <, bp + 1);
77eda14cbcSMatt Macy 			*bp64 = w;
78eda14cbcSMatt Macy 			bp64++;
79eda14cbcSMatt Macy 			if (!BPE_IS_PAYLOADWORD(bp, bp64))
80eda14cbcSMatt Macy 				bp64++;
81eda14cbcSMatt Macy 			w = 0;
82eda14cbcSMatt Macy 		}
83eda14cbcSMatt Macy 	}
84eda14cbcSMatt Macy 	/* write last partial word */
85eda14cbcSMatt Macy 	if (bp64 < (uint64_t *)(bp + 1))
86eda14cbcSMatt Macy 		*bp64 = w;
87eda14cbcSMatt Macy }
88eda14cbcSMatt Macy 
89eda14cbcSMatt Macy /*
90eda14cbcSMatt Macy  * buf must be at least BPE_GET_PSIZE(bp) bytes long (which will never be
91eda14cbcSMatt Macy  * more than BPE_PAYLOAD_SIZE bytes).
92eda14cbcSMatt Macy  */
93eda14cbcSMatt Macy void
94eda14cbcSMatt Macy decode_embedded_bp_compressed(const blkptr_t *bp, void *buf)
95eda14cbcSMatt Macy {
96eda14cbcSMatt Macy 	int psize;
97eda14cbcSMatt Macy 	uint8_t *buf8 = buf;
98eda14cbcSMatt Macy 	uint64_t w = 0;
99eda14cbcSMatt Macy 	const uint64_t *bp64 = (const uint64_t *)bp;
100eda14cbcSMatt Macy 
101eda14cbcSMatt Macy 	ASSERT(BP_IS_EMBEDDED(bp));
102eda14cbcSMatt Macy 
103eda14cbcSMatt Macy 	psize = BPE_GET_PSIZE(bp);
104eda14cbcSMatt Macy 
105eda14cbcSMatt Macy 	/*
106eda14cbcSMatt Macy 	 * Decode the words of the block pointer into the byte array.
107eda14cbcSMatt Macy 	 * Low bits of first word are the first byte (little endian).
108eda14cbcSMatt Macy 	 */
109eda14cbcSMatt Macy 	for (int i = 0; i < psize; i++) {
110eda14cbcSMatt Macy 		if (i % sizeof (w) == 0) {
111eda14cbcSMatt Macy 			/* beginning of a word */
112eda14cbcSMatt Macy 			ASSERT3P(bp64, <, bp + 1);
113eda14cbcSMatt Macy 			w = *bp64;
114eda14cbcSMatt Macy 			bp64++;
115eda14cbcSMatt Macy 			if (!BPE_IS_PAYLOADWORD(bp, bp64))
116eda14cbcSMatt Macy 				bp64++;
117eda14cbcSMatt Macy 		}
118eda14cbcSMatt Macy 		buf8[i] = BF64_GET(w, (i % sizeof (w)) * NBBY, NBBY);
119eda14cbcSMatt Macy 	}
120eda14cbcSMatt Macy }
121eda14cbcSMatt Macy 
122eda14cbcSMatt Macy /*
123eda14cbcSMatt Macy  * Fill in the buffer with the (decompressed) payload of the embedded
124eda14cbcSMatt Macy  * blkptr_t.  Takes into account compression and byteorder (the payload is
125eda14cbcSMatt Macy  * treated as a stream of bytes).
126eda14cbcSMatt Macy  * Return 0 on success, or ENOSPC if it won't fit in the buffer.
127eda14cbcSMatt Macy  */
128eda14cbcSMatt Macy int
129eda14cbcSMatt Macy decode_embedded_bp(const blkptr_t *bp, void *buf, int buflen)
130eda14cbcSMatt Macy {
131eda14cbcSMatt Macy 	int lsize, psize;
132eda14cbcSMatt Macy 
133eda14cbcSMatt Macy 	ASSERT(BP_IS_EMBEDDED(bp));
134eda14cbcSMatt Macy 
135eda14cbcSMatt Macy 	lsize = BPE_GET_LSIZE(bp);
136eda14cbcSMatt Macy 	psize = BPE_GET_PSIZE(bp);
137eda14cbcSMatt Macy 
138eda14cbcSMatt Macy 	if (lsize > buflen)
139eda14cbcSMatt Macy 		return (SET_ERROR(ENOSPC));
140eda14cbcSMatt Macy 	ASSERT3U(lsize, ==, buflen);
141eda14cbcSMatt Macy 
142eda14cbcSMatt Macy 	if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) {
143eda14cbcSMatt Macy 		uint8_t dstbuf[BPE_PAYLOAD_SIZE];
144eda14cbcSMatt Macy 		decode_embedded_bp_compressed(bp, dstbuf);
145eda14cbcSMatt Macy 		VERIFY0(zio_decompress_data_buf(BP_GET_COMPRESS(bp),
146eda14cbcSMatt Macy 		    dstbuf, buf, psize, buflen, NULL));
147eda14cbcSMatt Macy 	} else {
148eda14cbcSMatt Macy 		ASSERT3U(lsize, ==, psize);
149eda14cbcSMatt Macy 		decode_embedded_bp_compressed(bp, buf);
150eda14cbcSMatt Macy 	}
151eda14cbcSMatt Macy 
152eda14cbcSMatt Macy 	return (0);
153eda14cbcSMatt Macy }
154