xref: /linux/fs/btrfs/zlib.c (revision b8bb76713ec50df2f11efee386e16f93d51e1076)
1 /*
2  * Copyright (C) 2008 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  *
18  * Based on jffs2 zlib code:
19  * Copyright © 2001-2007 Red Hat, Inc.
20  * Created by David Woodhouse <dwmw2@infradead.org>
21  */
22 
23 #include <linux/kernel.h>
24 #include <linux/slab.h>
25 #include <linux/zlib.h>
26 #include <linux/zutil.h>
27 #include <linux/vmalloc.h>
28 #include <linux/init.h>
29 #include <linux/err.h>
30 #include <linux/sched.h>
31 #include <linux/pagemap.h>
32 #include <linux/bio.h>
33 #include "compression.h"
34 
35 /* Plan: call deflate() with avail_in == *sourcelen,
36 	avail_out = *dstlen - 12 and flush == Z_FINISH.
37 	If it doesn't manage to finish,	call it again with
38 	avail_in == 0 and avail_out set to the remaining 12
39 	bytes for it to clean up.
40    Q: Is 12 bytes sufficient?
41 */
42 #define STREAM_END_SPACE 12
43 
44 struct workspace {
45 	z_stream inf_strm;
46 	z_stream def_strm;
47 	char *buf;
48 	struct list_head list;
49 };
50 
51 static LIST_HEAD(idle_workspace);
52 static DEFINE_SPINLOCK(workspace_lock);
53 static unsigned long num_workspace;
54 static atomic_t alloc_workspace = ATOMIC_INIT(0);
55 static DECLARE_WAIT_QUEUE_HEAD(workspace_wait);
56 
57 /*
58  * this finds an available zlib workspace or allocates a new one
59  * NULL or an ERR_PTR is returned if things go bad.
60  */
61 static struct workspace *find_zlib_workspace(void)
62 {
63 	struct workspace *workspace;
64 	int ret;
65 	int cpus = num_online_cpus();
66 
67 again:
68 	spin_lock(&workspace_lock);
69 	if (!list_empty(&idle_workspace)) {
70 		workspace = list_entry(idle_workspace.next, struct workspace,
71 				       list);
72 		list_del(&workspace->list);
73 		num_workspace--;
74 		spin_unlock(&workspace_lock);
75 		return workspace;
76 
77 	}
78 	spin_unlock(&workspace_lock);
79 	if (atomic_read(&alloc_workspace) > cpus) {
80 		DEFINE_WAIT(wait);
81 		prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE);
82 		if (atomic_read(&alloc_workspace) > cpus)
83 			schedule();
84 		finish_wait(&workspace_wait, &wait);
85 		goto again;
86 	}
87 	atomic_inc(&alloc_workspace);
88 	workspace = kzalloc(sizeof(*workspace), GFP_NOFS);
89 	if (!workspace) {
90 		ret = -ENOMEM;
91 		goto fail;
92 	}
93 
94 	workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize());
95 	if (!workspace->def_strm.workspace) {
96 		ret = -ENOMEM;
97 		goto fail;
98 	}
99 	workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize());
100 	if (!workspace->inf_strm.workspace) {
101 		ret = -ENOMEM;
102 		goto fail_inflate;
103 	}
104 	workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
105 	if (!workspace->buf) {
106 		ret = -ENOMEM;
107 		goto fail_kmalloc;
108 	}
109 	return workspace;
110 
111 fail_kmalloc:
112 	vfree(workspace->inf_strm.workspace);
113 fail_inflate:
114 	vfree(workspace->def_strm.workspace);
115 fail:
116 	kfree(workspace);
117 	atomic_dec(&alloc_workspace);
118 	wake_up(&workspace_wait);
119 	return ERR_PTR(ret);
120 }
121 
122 /*
123  * put a workspace struct back on the list or free it if we have enough
124  * idle ones sitting around
125  */
126 static int free_workspace(struct workspace *workspace)
127 {
128 	spin_lock(&workspace_lock);
129 	if (num_workspace < num_online_cpus()) {
130 		list_add_tail(&workspace->list, &idle_workspace);
131 		num_workspace++;
132 		spin_unlock(&workspace_lock);
133 		if (waitqueue_active(&workspace_wait))
134 			wake_up(&workspace_wait);
135 		return 0;
136 	}
137 	spin_unlock(&workspace_lock);
138 	vfree(workspace->def_strm.workspace);
139 	vfree(workspace->inf_strm.workspace);
140 	kfree(workspace->buf);
141 	kfree(workspace);
142 
143 	atomic_dec(&alloc_workspace);
144 	if (waitqueue_active(&workspace_wait))
145 		wake_up(&workspace_wait);
146 	return 0;
147 }
148 
149 /*
150  * cleanup function for module exit
151  */
152 static void free_workspaces(void)
153 {
154 	struct workspace *workspace;
155 	while (!list_empty(&idle_workspace)) {
156 		workspace = list_entry(idle_workspace.next, struct workspace,
157 				       list);
158 		list_del(&workspace->list);
159 		vfree(workspace->def_strm.workspace);
160 		vfree(workspace->inf_strm.workspace);
161 		kfree(workspace->buf);
162 		kfree(workspace);
163 		atomic_dec(&alloc_workspace);
164 	}
165 }
166 
167 /*
168  * given an address space and start/len, compress the bytes.
169  *
170  * pages are allocated to hold the compressed result and stored
171  * in 'pages'
172  *
173  * out_pages is used to return the number of pages allocated.  There
174  * may be pages allocated even if we return an error
175  *
176  * total_in is used to return the number of bytes actually read.  It
177  * may be smaller then len if we had to exit early because we
178  * ran out of room in the pages array or because we cross the
179  * max_out threshold.
180  *
181  * total_out is used to return the total number of compressed bytes
182  *
183  * max_out tells us the max number of bytes that we're allowed to
184  * stuff into pages
185  */
186 int btrfs_zlib_compress_pages(struct address_space *mapping,
187 			      u64 start, unsigned long len,
188 			      struct page **pages,
189 			      unsigned long nr_dest_pages,
190 			      unsigned long *out_pages,
191 			      unsigned long *total_in,
192 			      unsigned long *total_out,
193 			      unsigned long max_out)
194 {
195 	int ret;
196 	struct workspace *workspace;
197 	char *data_in;
198 	char *cpage_out;
199 	int nr_pages = 0;
200 	struct page *in_page = NULL;
201 	struct page *out_page = NULL;
202 	int out_written = 0;
203 	int in_read = 0;
204 	unsigned long bytes_left;
205 
206 	*out_pages = 0;
207 	*total_out = 0;
208 	*total_in = 0;
209 
210 	workspace = find_zlib_workspace();
211 	if (!workspace)
212 		return -1;
213 
214 	if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) {
215 		printk(KERN_WARNING "deflateInit failed\n");
216 		ret = -1;
217 		goto out;
218 	}
219 
220 	workspace->def_strm.total_in = 0;
221 	workspace->def_strm.total_out = 0;
222 
223 	in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
224 	data_in = kmap(in_page);
225 
226 	out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
227 	cpage_out = kmap(out_page);
228 	pages[0] = out_page;
229 	nr_pages = 1;
230 
231 	workspace->def_strm.next_in = data_in;
232 	workspace->def_strm.next_out = cpage_out;
233 	workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
234 	workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE);
235 
236 	out_written = 0;
237 	in_read = 0;
238 
239 	while (workspace->def_strm.total_in < len) {
240 		ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH);
241 		if (ret != Z_OK) {
242 			printk(KERN_DEBUG "btrfs deflate in loop returned %d\n",
243 			       ret);
244 			zlib_deflateEnd(&workspace->def_strm);
245 			ret = -1;
246 			goto out;
247 		}
248 
249 		/* we're making it bigger, give up */
250 		if (workspace->def_strm.total_in > 8192 &&
251 		    workspace->def_strm.total_in <
252 		    workspace->def_strm.total_out) {
253 			ret = -1;
254 			goto out;
255 		}
256 		/* we need another page for writing out.  Test this
257 		 * before the total_in so we will pull in a new page for
258 		 * the stream end if required
259 		 */
260 		if (workspace->def_strm.avail_out == 0) {
261 			kunmap(out_page);
262 			if (nr_pages == nr_dest_pages) {
263 				out_page = NULL;
264 				ret = -1;
265 				goto out;
266 			}
267 			out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
268 			cpage_out = kmap(out_page);
269 			pages[nr_pages] = out_page;
270 			nr_pages++;
271 			workspace->def_strm.avail_out = PAGE_CACHE_SIZE;
272 			workspace->def_strm.next_out = cpage_out;
273 		}
274 		/* we're all done */
275 		if (workspace->def_strm.total_in >= len)
276 			break;
277 
278 		/* we've read in a full page, get a new one */
279 		if (workspace->def_strm.avail_in == 0) {
280 			if (workspace->def_strm.total_out > max_out)
281 				break;
282 
283 			bytes_left = len - workspace->def_strm.total_in;
284 			kunmap(in_page);
285 			page_cache_release(in_page);
286 
287 			start += PAGE_CACHE_SIZE;
288 			in_page = find_get_page(mapping,
289 						start >> PAGE_CACHE_SHIFT);
290 			data_in = kmap(in_page);
291 			workspace->def_strm.avail_in = min(bytes_left,
292 							   PAGE_CACHE_SIZE);
293 			workspace->def_strm.next_in = data_in;
294 		}
295 	}
296 	workspace->def_strm.avail_in = 0;
297 	ret = zlib_deflate(&workspace->def_strm, Z_FINISH);
298 	zlib_deflateEnd(&workspace->def_strm);
299 
300 	if (ret != Z_STREAM_END) {
301 		ret = -1;
302 		goto out;
303 	}
304 
305 	if (workspace->def_strm.total_out >= workspace->def_strm.total_in) {
306 		ret = -1;
307 		goto out;
308 	}
309 
310 	ret = 0;
311 	*total_out = workspace->def_strm.total_out;
312 	*total_in = workspace->def_strm.total_in;
313 out:
314 	*out_pages = nr_pages;
315 	if (out_page)
316 		kunmap(out_page);
317 
318 	if (in_page) {
319 		kunmap(in_page);
320 		page_cache_release(in_page);
321 	}
322 	free_workspace(workspace);
323 	return ret;
324 }
325 
326 /*
327  * pages_in is an array of pages with compressed data.
328  *
329  * disk_start is the starting logical offset of this array in the file
330  *
331  * bvec is a bio_vec of pages from the file that we want to decompress into
332  *
333  * vcnt is the count of pages in the biovec
334  *
335  * srclen is the number of bytes in pages_in
336  *
337  * The basic idea is that we have a bio that was created by readpages.
338  * The pages in the bio are for the uncompressed data, and they may not
339  * be contiguous.  They all correspond to the range of bytes covered by
340  * the compressed extent.
341  */
342 int btrfs_zlib_decompress_biovec(struct page **pages_in,
343 			      u64 disk_start,
344 			      struct bio_vec *bvec,
345 			      int vcnt,
346 			      size_t srclen)
347 {
348 	int ret = 0;
349 	int wbits = MAX_WBITS;
350 	struct workspace *workspace;
351 	char *data_in;
352 	size_t total_out = 0;
353 	unsigned long page_bytes_left;
354 	unsigned long page_in_index = 0;
355 	unsigned long page_out_index = 0;
356 	struct page *page_out;
357 	unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) /
358 					PAGE_CACHE_SIZE;
359 	unsigned long buf_start;
360 	unsigned long buf_offset;
361 	unsigned long bytes;
362 	unsigned long working_bytes;
363 	unsigned long pg_offset;
364 	unsigned long start_byte;
365 	unsigned long current_buf_start;
366 	char *kaddr;
367 
368 	workspace = find_zlib_workspace();
369 	if (!workspace)
370 		return -ENOMEM;
371 
372 	data_in = kmap(pages_in[page_in_index]);
373 	workspace->inf_strm.next_in = data_in;
374 	workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
375 	workspace->inf_strm.total_in = 0;
376 
377 	workspace->inf_strm.total_out = 0;
378 	workspace->inf_strm.next_out = workspace->buf;
379 	workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
380 	page_out = bvec[page_out_index].bv_page;
381 	page_bytes_left = PAGE_CACHE_SIZE;
382 	pg_offset = 0;
383 
384 	/* If it's deflate, and it's got no preset dictionary, then
385 	   we can tell zlib to skip the adler32 check. */
386 	if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
387 	    ((data_in[0] & 0x0f) == Z_DEFLATED) &&
388 	    !(((data_in[0]<<8) + data_in[1]) % 31)) {
389 
390 		wbits = -((data_in[0] >> 4) + 8);
391 		workspace->inf_strm.next_in += 2;
392 		workspace->inf_strm.avail_in -= 2;
393 	}
394 
395 	if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
396 		printk(KERN_WARNING "inflateInit failed\n");
397 		ret = -1;
398 		goto out;
399 	}
400 	while (workspace->inf_strm.total_in < srclen) {
401 		ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
402 		if (ret != Z_OK && ret != Z_STREAM_END)
403 			break;
404 		/*
405 		 * buf start is the byte offset we're of the start of
406 		 * our workspace buffer
407 		 */
408 		buf_start = total_out;
409 
410 		/* total_out is the last byte of the workspace buffer */
411 		total_out = workspace->inf_strm.total_out;
412 
413 		working_bytes = total_out - buf_start;
414 
415 		/*
416 		 * start byte is the first byte of the page we're currently
417 		 * copying into relative to the start of the compressed data.
418 		 */
419 		start_byte = page_offset(page_out) - disk_start;
420 
421 		if (working_bytes == 0) {
422 			/* we didn't make progress in this inflate
423 			 * call, we're done
424 			 */
425 			if (ret != Z_STREAM_END)
426 				ret = -1;
427 			break;
428 		}
429 
430 		/* we haven't yet hit data corresponding to this page */
431 		if (total_out <= start_byte)
432 			goto next;
433 
434 		/*
435 		 * the start of the data we care about is offset into
436 		 * the middle of our working buffer
437 		 */
438 		if (total_out > start_byte && buf_start < start_byte) {
439 			buf_offset = start_byte - buf_start;
440 			working_bytes -= buf_offset;
441 		} else {
442 			buf_offset = 0;
443 		}
444 		current_buf_start = buf_start;
445 
446 		/* copy bytes from the working buffer into the pages */
447 		while (working_bytes > 0) {
448 			bytes = min(PAGE_CACHE_SIZE - pg_offset,
449 				    PAGE_CACHE_SIZE - buf_offset);
450 			bytes = min(bytes, working_bytes);
451 			kaddr = kmap_atomic(page_out, KM_USER0);
452 			memcpy(kaddr + pg_offset, workspace->buf + buf_offset,
453 			       bytes);
454 			kunmap_atomic(kaddr, KM_USER0);
455 			flush_dcache_page(page_out);
456 
457 			pg_offset += bytes;
458 			page_bytes_left -= bytes;
459 			buf_offset += bytes;
460 			working_bytes -= bytes;
461 			current_buf_start += bytes;
462 
463 			/* check if we need to pick another page */
464 			if (page_bytes_left == 0) {
465 				page_out_index++;
466 				if (page_out_index >= vcnt) {
467 					ret = 0;
468 					goto done;
469 				}
470 
471 				page_out = bvec[page_out_index].bv_page;
472 				pg_offset = 0;
473 				page_bytes_left = PAGE_CACHE_SIZE;
474 				start_byte = page_offset(page_out) - disk_start;
475 
476 				/*
477 				 * make sure our new page is covered by this
478 				 * working buffer
479 				 */
480 				if (total_out <= start_byte)
481 					goto next;
482 
483 				/* the next page in the biovec might not
484 				 * be adjacent to the last page, but it
485 				 * might still be found inside this working
486 				 * buffer.  bump our offset pointer
487 				 */
488 				if (total_out > start_byte &&
489 				    current_buf_start < start_byte) {
490 					buf_offset = start_byte - buf_start;
491 					working_bytes = total_out - start_byte;
492 					current_buf_start = buf_start +
493 						buf_offset;
494 				}
495 			}
496 		}
497 next:
498 		workspace->inf_strm.next_out = workspace->buf;
499 		workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
500 
501 		if (workspace->inf_strm.avail_in == 0) {
502 			unsigned long tmp;
503 			kunmap(pages_in[page_in_index]);
504 			page_in_index++;
505 			if (page_in_index >= total_pages_in) {
506 				data_in = NULL;
507 				break;
508 			}
509 			data_in = kmap(pages_in[page_in_index]);
510 			workspace->inf_strm.next_in = data_in;
511 			tmp = srclen - workspace->inf_strm.total_in;
512 			workspace->inf_strm.avail_in = min(tmp,
513 							   PAGE_CACHE_SIZE);
514 		}
515 	}
516 	if (ret != Z_STREAM_END)
517 		ret = -1;
518 	else
519 		ret = 0;
520 done:
521 	zlib_inflateEnd(&workspace->inf_strm);
522 	if (data_in)
523 		kunmap(pages_in[page_in_index]);
524 out:
525 	free_workspace(workspace);
526 	return ret;
527 }
528 
529 /*
530  * a less complex decompression routine.  Our compressed data fits in a
531  * single page, and we want to read a single page out of it.
532  * start_byte tells us the offset into the compressed data we're interested in
533  */
534 int btrfs_zlib_decompress(unsigned char *data_in,
535 			  struct page *dest_page,
536 			  unsigned long start_byte,
537 			  size_t srclen, size_t destlen)
538 {
539 	int ret = 0;
540 	int wbits = MAX_WBITS;
541 	struct workspace *workspace;
542 	unsigned long bytes_left = destlen;
543 	unsigned long total_out = 0;
544 	char *kaddr;
545 
546 	if (destlen > PAGE_CACHE_SIZE)
547 		return -ENOMEM;
548 
549 	workspace = find_zlib_workspace();
550 	if (!workspace)
551 		return -ENOMEM;
552 
553 	workspace->inf_strm.next_in = data_in;
554 	workspace->inf_strm.avail_in = srclen;
555 	workspace->inf_strm.total_in = 0;
556 
557 	workspace->inf_strm.next_out = workspace->buf;
558 	workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
559 	workspace->inf_strm.total_out = 0;
560 	/* If it's deflate, and it's got no preset dictionary, then
561 	   we can tell zlib to skip the adler32 check. */
562 	if (srclen > 2 && !(data_in[1] & PRESET_DICT) &&
563 	    ((data_in[0] & 0x0f) == Z_DEFLATED) &&
564 	    !(((data_in[0]<<8) + data_in[1]) % 31)) {
565 
566 		wbits = -((data_in[0] >> 4) + 8);
567 		workspace->inf_strm.next_in += 2;
568 		workspace->inf_strm.avail_in -= 2;
569 	}
570 
571 	if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) {
572 		printk(KERN_WARNING "inflateInit failed\n");
573 		ret = -1;
574 		goto out;
575 	}
576 
577 	while (bytes_left > 0) {
578 		unsigned long buf_start;
579 		unsigned long buf_offset;
580 		unsigned long bytes;
581 		unsigned long pg_offset = 0;
582 
583 		ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH);
584 		if (ret != Z_OK && ret != Z_STREAM_END)
585 			break;
586 
587 		buf_start = total_out;
588 		total_out = workspace->inf_strm.total_out;
589 
590 		if (total_out == buf_start) {
591 			ret = -1;
592 			break;
593 		}
594 
595 		if (total_out <= start_byte)
596 			goto next;
597 
598 		if (total_out > start_byte && buf_start < start_byte)
599 			buf_offset = start_byte - buf_start;
600 		else
601 			buf_offset = 0;
602 
603 		bytes = min(PAGE_CACHE_SIZE - pg_offset,
604 			    PAGE_CACHE_SIZE - buf_offset);
605 		bytes = min(bytes, bytes_left);
606 
607 		kaddr = kmap_atomic(dest_page, KM_USER0);
608 		memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
609 		kunmap_atomic(kaddr, KM_USER0);
610 
611 		pg_offset += bytes;
612 		bytes_left -= bytes;
613 next:
614 		workspace->inf_strm.next_out = workspace->buf;
615 		workspace->inf_strm.avail_out = PAGE_CACHE_SIZE;
616 	}
617 
618 	if (ret != Z_STREAM_END && bytes_left != 0)
619 		ret = -1;
620 	else
621 		ret = 0;
622 
623 	zlib_inflateEnd(&workspace->inf_strm);
624 out:
625 	free_workspace(workspace);
626 	return ret;
627 }
628 
629 void btrfs_zlib_exit(void)
630 {
631     free_workspaces();
632 }
633