1*61145dc2SMartin Matuska // SPDX-License-Identifier: CDDL-1.0
2eda14cbcSMatt Macy /*
3eda14cbcSMatt Macy * CDDL HEADER START
4eda14cbcSMatt Macy *
5eda14cbcSMatt Macy * This file and its contents are supplied under the terms of the
6eda14cbcSMatt Macy * Common Development and Distribution License ("CDDL"), version 1.0.
7eda14cbcSMatt Macy * You may only use this file in accordance with the terms of version
8eda14cbcSMatt Macy * 1.0 of the CDDL.
9eda14cbcSMatt Macy *
10eda14cbcSMatt Macy * A full copy of the text of the CDDL should have accompanied this
11eda14cbcSMatt Macy * source. A copy of the CDDL is also available via the Internet at
12eda14cbcSMatt Macy * http://www.illumos.org/license/CDDL.
13eda14cbcSMatt Macy *
14eda14cbcSMatt Macy * CDDL HEADER END
15eda14cbcSMatt Macy */
16eda14cbcSMatt Macy
17eda14cbcSMatt Macy /*
18eda14cbcSMatt Macy * Copyright (c) 2013, 2016 by Delphix. All rights reserved.
19eda14cbcSMatt Macy */
20eda14cbcSMatt Macy
21eda14cbcSMatt Macy #include <sys/blkptr.h>
22eda14cbcSMatt Macy #include <sys/zfs_context.h>
23eda14cbcSMatt Macy #include <sys/zio.h>
24eda14cbcSMatt Macy #include <sys/zio_compress.h>
25eda14cbcSMatt Macy
26eda14cbcSMatt Macy /*
27eda14cbcSMatt Macy * Embedded-data Block Pointers
28eda14cbcSMatt Macy *
29eda14cbcSMatt Macy * Normally, block pointers point (via their DVAs) to a block which holds data.
30eda14cbcSMatt Macy * If the data that we need to store is very small, this is an inefficient
31eda14cbcSMatt Macy * use of space, because a block must be at minimum 1 sector (typically 512
32eda14cbcSMatt Macy * bytes or 4KB). Additionally, reading these small blocks tends to generate
33eda14cbcSMatt Macy * more random reads.
34eda14cbcSMatt Macy *
35eda14cbcSMatt Macy * Embedded-data Block Pointers allow small pieces of data (the "payload",
36eda14cbcSMatt Macy * up to 112 bytes) to be stored in the block pointer itself, instead of
37eda14cbcSMatt Macy * being pointed to. The "Pointer" part of this name is a bit of a
38eda14cbcSMatt Macy * misnomer, as nothing is pointed to.
39eda14cbcSMatt Macy *
40eda14cbcSMatt Macy * BP_EMBEDDED_TYPE_DATA block pointers allow highly-compressible data to
41eda14cbcSMatt Macy * be embedded in the block pointer. The logic for this is handled in
42eda14cbcSMatt Macy * the SPA, by the zio pipeline. Therefore most code outside the zio
43eda14cbcSMatt Macy * pipeline doesn't need special-cases to handle these block pointers.
44eda14cbcSMatt Macy *
45eda14cbcSMatt Macy * See spa.h for details on the exact layout of embedded block pointers.
46eda14cbcSMatt Macy */
47eda14cbcSMatt Macy
48eda14cbcSMatt Macy void
encode_embedded_bp_compressed(blkptr_t * bp,void * data,enum zio_compress comp,int uncompressed_size,int compressed_size)49eda14cbcSMatt Macy encode_embedded_bp_compressed(blkptr_t *bp, void *data,
50eda14cbcSMatt Macy enum zio_compress comp, int uncompressed_size, int compressed_size)
51eda14cbcSMatt Macy {
52eda14cbcSMatt Macy uint64_t *bp64 = (uint64_t *)bp;
53eda14cbcSMatt Macy uint64_t w = 0;
54eda14cbcSMatt Macy uint8_t *data8 = data;
55eda14cbcSMatt Macy
56eda14cbcSMatt Macy ASSERT3U(compressed_size, <=, BPE_PAYLOAD_SIZE);
57eda14cbcSMatt Macy ASSERT(uncompressed_size == compressed_size ||
58eda14cbcSMatt Macy comp != ZIO_COMPRESS_OFF);
59eda14cbcSMatt Macy ASSERT3U(comp, >=, ZIO_COMPRESS_OFF);
60eda14cbcSMatt Macy ASSERT3U(comp, <, ZIO_COMPRESS_FUNCTIONS);
61eda14cbcSMatt Macy
62da5137abSMartin Matuska memset(bp, 0, sizeof (*bp));
63eda14cbcSMatt Macy BP_SET_EMBEDDED(bp, B_TRUE);
64eda14cbcSMatt Macy BP_SET_COMPRESS(bp, comp);
65eda14cbcSMatt Macy BP_SET_BYTEORDER(bp, ZFS_HOST_BYTEORDER);
66eda14cbcSMatt Macy BPE_SET_LSIZE(bp, uncompressed_size);
67eda14cbcSMatt Macy BPE_SET_PSIZE(bp, compressed_size);
68eda14cbcSMatt Macy
69eda14cbcSMatt Macy /*
70eda14cbcSMatt Macy * Encode the byte array into the words of the block pointer.
71eda14cbcSMatt Macy * First byte goes into low bits of first word (little endian).
72eda14cbcSMatt Macy */
73eda14cbcSMatt Macy for (int i = 0; i < compressed_size; i++) {
74eda14cbcSMatt Macy BF64_SET(w, (i % sizeof (w)) * NBBY, NBBY, data8[i]);
75eda14cbcSMatt Macy if (i % sizeof (w) == sizeof (w) - 1) {
76eda14cbcSMatt Macy /* we've reached the end of a word */
77eda14cbcSMatt Macy ASSERT3P(bp64, <, bp + 1);
78eda14cbcSMatt Macy *bp64 = w;
79eda14cbcSMatt Macy bp64++;
80eda14cbcSMatt Macy if (!BPE_IS_PAYLOADWORD(bp, bp64))
81eda14cbcSMatt Macy bp64++;
82eda14cbcSMatt Macy w = 0;
83eda14cbcSMatt Macy }
84eda14cbcSMatt Macy }
85eda14cbcSMatt Macy /* write last partial word */
86eda14cbcSMatt Macy if (bp64 < (uint64_t *)(bp + 1))
87eda14cbcSMatt Macy *bp64 = w;
88eda14cbcSMatt Macy }
89eda14cbcSMatt Macy
90eda14cbcSMatt Macy /*
91eda14cbcSMatt Macy * buf must be at least BPE_GET_PSIZE(bp) bytes long (which will never be
92eda14cbcSMatt Macy * more than BPE_PAYLOAD_SIZE bytes).
93eda14cbcSMatt Macy */
94eda14cbcSMatt Macy void
decode_embedded_bp_compressed(const blkptr_t * bp,void * buf)95eda14cbcSMatt Macy decode_embedded_bp_compressed(const blkptr_t *bp, void *buf)
96eda14cbcSMatt Macy {
97eda14cbcSMatt Macy int psize;
98eda14cbcSMatt Macy uint8_t *buf8 = buf;
99eda14cbcSMatt Macy uint64_t w = 0;
100eda14cbcSMatt Macy const uint64_t *bp64 = (const uint64_t *)bp;
101eda14cbcSMatt Macy
102eda14cbcSMatt Macy ASSERT(BP_IS_EMBEDDED(bp));
103eda14cbcSMatt Macy
104eda14cbcSMatt Macy psize = BPE_GET_PSIZE(bp);
105eda14cbcSMatt Macy
106eda14cbcSMatt Macy /*
107eda14cbcSMatt Macy * Decode the words of the block pointer into the byte array.
108eda14cbcSMatt Macy * Low bits of first word are the first byte (little endian).
109eda14cbcSMatt Macy */
110eda14cbcSMatt Macy for (int i = 0; i < psize; i++) {
111eda14cbcSMatt Macy if (i % sizeof (w) == 0) {
112eda14cbcSMatt Macy /* beginning of a word */
113eda14cbcSMatt Macy ASSERT3P(bp64, <, bp + 1);
114eda14cbcSMatt Macy w = *bp64;
115eda14cbcSMatt Macy bp64++;
116eda14cbcSMatt Macy if (!BPE_IS_PAYLOADWORD(bp, bp64))
117eda14cbcSMatt Macy bp64++;
118eda14cbcSMatt Macy }
119eda14cbcSMatt Macy buf8[i] = BF64_GET(w, (i % sizeof (w)) * NBBY, NBBY);
120eda14cbcSMatt Macy }
121eda14cbcSMatt Macy }
122eda14cbcSMatt Macy
123eda14cbcSMatt Macy /*
124eda14cbcSMatt Macy * Fill in the buffer with the (decompressed) payload of the embedded
125eda14cbcSMatt Macy * blkptr_t. Takes into account compression and byteorder (the payload is
126eda14cbcSMatt Macy * treated as a stream of bytes).
127eda14cbcSMatt Macy * Return 0 on success, or ENOSPC if it won't fit in the buffer.
128eda14cbcSMatt Macy */
129eda14cbcSMatt Macy int
decode_embedded_bp(const blkptr_t * bp,void * buf,int buflen)130eda14cbcSMatt Macy decode_embedded_bp(const blkptr_t *bp, void *buf, int buflen)
131eda14cbcSMatt Macy {
132eda14cbcSMatt Macy int lsize, psize;
133eda14cbcSMatt Macy
134eda14cbcSMatt Macy ASSERT(BP_IS_EMBEDDED(bp));
135eda14cbcSMatt Macy
136eda14cbcSMatt Macy lsize = BPE_GET_LSIZE(bp);
137eda14cbcSMatt Macy psize = BPE_GET_PSIZE(bp);
138eda14cbcSMatt Macy
139eda14cbcSMatt Macy if (lsize > buflen)
140eda14cbcSMatt Macy return (SET_ERROR(ENOSPC));
141eda14cbcSMatt Macy ASSERT3U(lsize, ==, buflen);
142eda14cbcSMatt Macy
143eda14cbcSMatt Macy if (BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF) {
144eda14cbcSMatt Macy uint8_t dstbuf[BPE_PAYLOAD_SIZE];
145eda14cbcSMatt Macy decode_embedded_bp_compressed(bp, dstbuf);
146e2df9bb4SMartin Matuska abd_t cabd, dabd;
147e2df9bb4SMartin Matuska abd_get_from_buf_struct(&cabd, dstbuf, psize);
148e2df9bb4SMartin Matuska abd_get_from_buf_struct(&dabd, buf, buflen);
149e2df9bb4SMartin Matuska VERIFY0(zio_decompress_data(BP_GET_COMPRESS(bp), &cabd,
150e2df9bb4SMartin Matuska &dabd, psize, buflen, NULL));
151e2df9bb4SMartin Matuska abd_free(&dabd);
152e2df9bb4SMartin Matuska abd_free(&cabd);
153eda14cbcSMatt Macy } else {
154eda14cbcSMatt Macy ASSERT3U(lsize, ==, psize);
155eda14cbcSMatt Macy decode_embedded_bp_compressed(bp, buf);
156eda14cbcSMatt Macy }
157eda14cbcSMatt Macy
158eda14cbcSMatt Macy return (0);
159eda14cbcSMatt Macy }
160