xref: /freebsd/sbin/hastd/hast_compression.c (revision 4fbb9c43aa44d9145151bb5f77d302ba01fb7551)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 Pawel Jakub Dawidek <pawel@dawidek.net>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 #include <sys/endian.h>
31 
32 #include <errno.h>
33 #include <string.h>
34 #include <strings.h>
35 
36 #include <hast.h>
37 #include <lzf.h>
38 #include <nv.h>
39 #include <pjdlog.h>
40 
41 #include "hast_compression.h"
42 
43 static bool
44 allzeros(const void *data, size_t size)
45 {
46 	const uint64_t *p = data;
47 	unsigned int i;
48 	uint64_t v;
49 
50 	PJDLOG_ASSERT((size % sizeof(*p)) == 0);
51 
52 	/*
53 	 * This is the fastest method I found for checking if the given
54 	 * buffer contain all zeros.
55 	 * Because inside the loop we don't check at every step, we would
56 	 * get an answer only after walking through entire buffer.
57 	 * To return early if the buffer doesn't contain all zeros, we probe
58 	 * 8 bytes at the beginning, in the middle and at the end of the buffer
59 	 * first.
60 	 */
61 
62 	size >>= 3;	/* divide by 8 */
63 	if ((p[0] | p[size >> 1] | p[size - 1]) != 0)
64 		return (false);
65 	v = 0;
66 	for (i = 0; i < size; i++)
67 		v |= *p++;
68 	return (v == 0);
69 }
70 
71 static void *
72 hast_hole_compress(const unsigned char *data, size_t *sizep)
73 {
74 	uint32_t size;
75 	void *newbuf;
76 
77 	if (!allzeros(data, *sizep))
78 		return (NULL);
79 
80 	newbuf = malloc(sizeof(size));
81 	if (newbuf == NULL) {
82 		pjdlog_warning("Unable to compress (no memory: %zu).",
83 		    (size_t)*sizep);
84 		return (NULL);
85 	}
86 	size = htole32((uint32_t)*sizep);
87 	bcopy(&size, newbuf, sizeof(size));
88 	*sizep = sizeof(size);
89 
90 	return (newbuf);
91 }
92 
93 static void *
94 hast_hole_decompress(const unsigned char *data, size_t *sizep)
95 {
96 	uint32_t size;
97 	void *newbuf;
98 
99 	if (*sizep != sizeof(size)) {
100 		pjdlog_error("Unable to decompress (invalid size: %zu).",
101 		    *sizep);
102 		return (NULL);
103 	}
104 
105 	bcopy(data, &size, sizeof(size));
106 	size = le32toh(size);
107 
108 	newbuf = malloc(size);
109 	if (newbuf == NULL) {
110 		pjdlog_error("Unable to decompress (no memory: %zu).",
111 		    (size_t)size);
112 		return (NULL);
113 	}
114 	bzero(newbuf, size);
115 	*sizep = size;
116 
117 	return (newbuf);
118 }
119 
120 /* Minimum block size to try to compress. */
121 #define	HAST_LZF_COMPRESS_MIN	1024
122 
123 static void *
124 hast_lzf_compress(const unsigned char *data, size_t *sizep)
125 {
126 	unsigned char *newbuf;
127 	uint32_t origsize;
128 	size_t newsize;
129 
130 	origsize = *sizep;
131 
132 	if (origsize <= HAST_LZF_COMPRESS_MIN)
133 		return (NULL);
134 
135 	newsize = sizeof(origsize) + origsize - HAST_LZF_COMPRESS_MIN;
136 	newbuf = malloc(newsize);
137 	if (newbuf == NULL) {
138 		pjdlog_warning("Unable to compress (no memory: %zu).",
139 		    newsize);
140 		return (NULL);
141 	}
142 	newsize = lzf_compress(data, *sizep, newbuf + sizeof(origsize),
143 	    newsize - sizeof(origsize));
144 	if (newsize == 0) {
145 		free(newbuf);
146 		return (NULL);
147 	}
148 	origsize = htole32(origsize);
149 	bcopy(&origsize, newbuf, sizeof(origsize));
150 
151 	*sizep = sizeof(origsize) + newsize;
152 	return (newbuf);
153 }
154 
155 static void *
156 hast_lzf_decompress(const unsigned char *data, size_t *sizep)
157 {
158 	unsigned char *newbuf;
159 	uint32_t origsize;
160 	size_t newsize;
161 
162 	PJDLOG_ASSERT(*sizep > sizeof(origsize));
163 
164 	bcopy(data, &origsize, sizeof(origsize));
165 	origsize = le32toh(origsize);
166 	PJDLOG_ASSERT(origsize > HAST_LZF_COMPRESS_MIN);
167 
168 	newbuf = malloc(origsize);
169 	if (newbuf == NULL) {
170 		pjdlog_error("Unable to decompress (no memory: %zu).",
171 		    (size_t)origsize);
172 		return (NULL);
173 	}
174 	newsize = lzf_decompress(data + sizeof(origsize),
175 	    *sizep - sizeof(origsize), newbuf, origsize);
176 	if (newsize == 0) {
177 		free(newbuf);
178 		pjdlog_error("Unable to decompress.");
179 		return (NULL);
180 	}
181 	PJDLOG_ASSERT(newsize == origsize);
182 
183 	*sizep = newsize;
184 	return (newbuf);
185 }
186 
187 const char *
188 compression_name(int num)
189 {
190 
191 	switch (num) {
192 	case HAST_COMPRESSION_NONE:
193 		return ("none");
194 	case HAST_COMPRESSION_HOLE:
195 		return ("hole");
196 	case HAST_COMPRESSION_LZF:
197 		return ("lzf");
198 	}
199 	return ("unknown");
200 }
201 
202 int
203 compression_send(const struct hast_resource *res, struct nv *nv, void **datap,
204     size_t *sizep, bool *freedatap)
205 {
206 	unsigned char *newbuf;
207 	int compression;
208 	size_t size;
209 
210 	size = *sizep;
211 	compression = res->hr_compression;
212 
213 	switch (compression) {
214 	case HAST_COMPRESSION_NONE:
215 		return (0);
216 	case HAST_COMPRESSION_HOLE:
217 		newbuf = hast_hole_compress(*datap, &size);
218 		break;
219 	case HAST_COMPRESSION_LZF:
220 		/* Try 'hole' compression first. */
221 		newbuf = hast_hole_compress(*datap, &size);
222 		if (newbuf != NULL)
223 			compression = HAST_COMPRESSION_HOLE;
224 		else
225 			newbuf = hast_lzf_compress(*datap, &size);
226 		break;
227 	default:
228 		PJDLOG_ABORT("Invalid compression: %d.", res->hr_compression);
229 	}
230 
231 	if (newbuf == NULL) {
232 		/* Unable to compress the data. */
233 		return (0);
234 	}
235 	nv_add_string(nv, compression_name(compression), "compression");
236 	if (nv_error(nv) != 0) {
237 		free(newbuf);
238 		errno = nv_error(nv);
239 		return (-1);
240 	}
241 	if (*freedatap)
242 		free(*datap);
243 	*freedatap = true;
244 	*datap = newbuf;
245 	*sizep = size;
246 
247 	return (0);
248 }
249 
250 int
251 compression_recv(const struct hast_resource *res __unused, struct nv *nv,
252     void **datap, size_t *sizep, bool *freedatap)
253 {
254 	unsigned char *newbuf;
255 	const char *algo;
256 	size_t size;
257 
258 	algo = nv_get_string(nv, "compression");
259 	if (algo == NULL)
260 		return (0);	/* No compression. */
261 
262 	newbuf = NULL;
263 	size = *sizep;
264 
265 	if (strcmp(algo, "hole") == 0)
266 		newbuf = hast_hole_decompress(*datap, &size);
267 	else if (strcmp(algo, "lzf") == 0)
268 		newbuf = hast_lzf_decompress(*datap, &size);
269 	else {
270 		pjdlog_error("Unknown compression algorithm '%s'.", algo);
271 		return (-1);	/* Unknown compression algorithm. */
272 	}
273 
274 	if (newbuf == NULL)
275 		return (-1);
276 	if (*freedatap)
277 		free(*datap);
278 	*freedatap = true;
279 	*datap = newbuf;
280 	*sizep = size;
281 
282 	return (0);
283 }
284