1 /* 2 * Copyright (C) 2008 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 * 18 * Based on jffs2 zlib code: 19 * Copyright © 2001-2007 Red Hat, Inc. 20 * Created by David Woodhouse <dwmw2@infradead.org> 21 */ 22 23 #include <linux/kernel.h> 24 #include <linux/slab.h> 25 #include <linux/zlib.h> 26 #include <linux/zutil.h> 27 #include <linux/vmalloc.h> 28 #include <linux/init.h> 29 #include <linux/err.h> 30 #include <linux/sched.h> 31 #include <linux/pagemap.h> 32 #include <linux/bio.h> 33 #include "compression.h" 34 35 /* Plan: call deflate() with avail_in == *sourcelen, 36 avail_out = *dstlen - 12 and flush == Z_FINISH. 37 If it doesn't manage to finish, call it again with 38 avail_in == 0 and avail_out set to the remaining 12 39 bytes for it to clean up. 40 Q: Is 12 bytes sufficient? 41 */ 42 #define STREAM_END_SPACE 12 43 44 struct workspace { 45 z_stream inf_strm; 46 z_stream def_strm; 47 char *buf; 48 struct list_head list; 49 }; 50 51 static LIST_HEAD(idle_workspace); 52 static DEFINE_SPINLOCK(workspace_lock); 53 static unsigned long num_workspace; 54 static atomic_t alloc_workspace = ATOMIC_INIT(0); 55 static DECLARE_WAIT_QUEUE_HEAD(workspace_wait); 56 57 /* 58 * this finds an available zlib workspace or allocates a new one 59 * NULL or an ERR_PTR is returned if things go bad. 60 */ 61 static struct workspace *find_zlib_workspace(void) 62 { 63 struct workspace *workspace; 64 int ret; 65 int cpus = num_online_cpus(); 66 67 again: 68 spin_lock(&workspace_lock); 69 if (!list_empty(&idle_workspace)) { 70 workspace = list_entry(idle_workspace.next, struct workspace, 71 list); 72 list_del(&workspace->list); 73 num_workspace--; 74 spin_unlock(&workspace_lock); 75 return workspace; 76 77 } 78 spin_unlock(&workspace_lock); 79 if (atomic_read(&alloc_workspace) > cpus) { 80 DEFINE_WAIT(wait); 81 prepare_to_wait(&workspace_wait, &wait, TASK_UNINTERRUPTIBLE); 82 if (atomic_read(&alloc_workspace) > cpus) 83 schedule(); 84 finish_wait(&workspace_wait, &wait); 85 goto again; 86 } 87 atomic_inc(&alloc_workspace); 88 workspace = kzalloc(sizeof(*workspace), GFP_NOFS); 89 if (!workspace) { 90 ret = -ENOMEM; 91 goto fail; 92 } 93 94 workspace->def_strm.workspace = vmalloc(zlib_deflate_workspacesize()); 95 if (!workspace->def_strm.workspace) { 96 ret = -ENOMEM; 97 goto fail; 98 } 99 workspace->inf_strm.workspace = vmalloc(zlib_inflate_workspacesize()); 100 if (!workspace->inf_strm.workspace) { 101 ret = -ENOMEM; 102 goto fail_inflate; 103 } 104 workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS); 105 if (!workspace->buf) { 106 ret = -ENOMEM; 107 goto fail_kmalloc; 108 } 109 return workspace; 110 111 fail_kmalloc: 112 vfree(workspace->inf_strm.workspace); 113 fail_inflate: 114 vfree(workspace->def_strm.workspace); 115 fail: 116 kfree(workspace); 117 atomic_dec(&alloc_workspace); 118 wake_up(&workspace_wait); 119 return ERR_PTR(ret); 120 } 121 122 /* 123 * put a workspace struct back on the list or free it if we have enough 124 * idle ones sitting around 125 */ 126 static int free_workspace(struct workspace *workspace) 127 { 128 spin_lock(&workspace_lock); 129 if (num_workspace < num_online_cpus()) { 130 list_add_tail(&workspace->list, &idle_workspace); 131 num_workspace++; 132 spin_unlock(&workspace_lock); 133 if (waitqueue_active(&workspace_wait)) 134 wake_up(&workspace_wait); 135 return 0; 136 } 137 spin_unlock(&workspace_lock); 138 vfree(workspace->def_strm.workspace); 139 vfree(workspace->inf_strm.workspace); 140 kfree(workspace->buf); 141 kfree(workspace); 142 143 atomic_dec(&alloc_workspace); 144 if (waitqueue_active(&workspace_wait)) 145 wake_up(&workspace_wait); 146 return 0; 147 } 148 149 /* 150 * cleanup function for module exit 151 */ 152 static void free_workspaces(void) 153 { 154 struct workspace *workspace; 155 while (!list_empty(&idle_workspace)) { 156 workspace = list_entry(idle_workspace.next, struct workspace, 157 list); 158 list_del(&workspace->list); 159 vfree(workspace->def_strm.workspace); 160 vfree(workspace->inf_strm.workspace); 161 kfree(workspace->buf); 162 kfree(workspace); 163 atomic_dec(&alloc_workspace); 164 } 165 } 166 167 /* 168 * given an address space and start/len, compress the bytes. 169 * 170 * pages are allocated to hold the compressed result and stored 171 * in 'pages' 172 * 173 * out_pages is used to return the number of pages allocated. There 174 * may be pages allocated even if we return an error 175 * 176 * total_in is used to return the number of bytes actually read. It 177 * may be smaller then len if we had to exit early because we 178 * ran out of room in the pages array or because we cross the 179 * max_out threshold. 180 * 181 * total_out is used to return the total number of compressed bytes 182 * 183 * max_out tells us the max number of bytes that we're allowed to 184 * stuff into pages 185 */ 186 int btrfs_zlib_compress_pages(struct address_space *mapping, 187 u64 start, unsigned long len, 188 struct page **pages, 189 unsigned long nr_dest_pages, 190 unsigned long *out_pages, 191 unsigned long *total_in, 192 unsigned long *total_out, 193 unsigned long max_out) 194 { 195 int ret; 196 struct workspace *workspace; 197 char *data_in; 198 char *cpage_out; 199 int nr_pages = 0; 200 struct page *in_page = NULL; 201 struct page *out_page = NULL; 202 unsigned long bytes_left; 203 204 *out_pages = 0; 205 *total_out = 0; 206 *total_in = 0; 207 208 workspace = find_zlib_workspace(); 209 if (IS_ERR(workspace)) 210 return -1; 211 212 if (Z_OK != zlib_deflateInit(&workspace->def_strm, 3)) { 213 printk(KERN_WARNING "deflateInit failed\n"); 214 ret = -1; 215 goto out; 216 } 217 218 workspace->def_strm.total_in = 0; 219 workspace->def_strm.total_out = 0; 220 221 in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT); 222 data_in = kmap(in_page); 223 224 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 225 cpage_out = kmap(out_page); 226 pages[0] = out_page; 227 nr_pages = 1; 228 229 workspace->def_strm.next_in = data_in; 230 workspace->def_strm.next_out = cpage_out; 231 workspace->def_strm.avail_out = PAGE_CACHE_SIZE; 232 workspace->def_strm.avail_in = min(len, PAGE_CACHE_SIZE); 233 234 while (workspace->def_strm.total_in < len) { 235 ret = zlib_deflate(&workspace->def_strm, Z_SYNC_FLUSH); 236 if (ret != Z_OK) { 237 printk(KERN_DEBUG "btrfs deflate in loop returned %d\n", 238 ret); 239 zlib_deflateEnd(&workspace->def_strm); 240 ret = -1; 241 goto out; 242 } 243 244 /* we're making it bigger, give up */ 245 if (workspace->def_strm.total_in > 8192 && 246 workspace->def_strm.total_in < 247 workspace->def_strm.total_out) { 248 ret = -1; 249 goto out; 250 } 251 /* we need another page for writing out. Test this 252 * before the total_in so we will pull in a new page for 253 * the stream end if required 254 */ 255 if (workspace->def_strm.avail_out == 0) { 256 kunmap(out_page); 257 if (nr_pages == nr_dest_pages) { 258 out_page = NULL; 259 ret = -1; 260 goto out; 261 } 262 out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM); 263 cpage_out = kmap(out_page); 264 pages[nr_pages] = out_page; 265 nr_pages++; 266 workspace->def_strm.avail_out = PAGE_CACHE_SIZE; 267 workspace->def_strm.next_out = cpage_out; 268 } 269 /* we're all done */ 270 if (workspace->def_strm.total_in >= len) 271 break; 272 273 /* we've read in a full page, get a new one */ 274 if (workspace->def_strm.avail_in == 0) { 275 if (workspace->def_strm.total_out > max_out) 276 break; 277 278 bytes_left = len - workspace->def_strm.total_in; 279 kunmap(in_page); 280 page_cache_release(in_page); 281 282 start += PAGE_CACHE_SIZE; 283 in_page = find_get_page(mapping, 284 start >> PAGE_CACHE_SHIFT); 285 data_in = kmap(in_page); 286 workspace->def_strm.avail_in = min(bytes_left, 287 PAGE_CACHE_SIZE); 288 workspace->def_strm.next_in = data_in; 289 } 290 } 291 workspace->def_strm.avail_in = 0; 292 ret = zlib_deflate(&workspace->def_strm, Z_FINISH); 293 zlib_deflateEnd(&workspace->def_strm); 294 295 if (ret != Z_STREAM_END) { 296 ret = -1; 297 goto out; 298 } 299 300 if (workspace->def_strm.total_out >= workspace->def_strm.total_in) { 301 ret = -1; 302 goto out; 303 } 304 305 ret = 0; 306 *total_out = workspace->def_strm.total_out; 307 *total_in = workspace->def_strm.total_in; 308 out: 309 *out_pages = nr_pages; 310 if (out_page) 311 kunmap(out_page); 312 313 if (in_page) { 314 kunmap(in_page); 315 page_cache_release(in_page); 316 } 317 free_workspace(workspace); 318 return ret; 319 } 320 321 /* 322 * pages_in is an array of pages with compressed data. 323 * 324 * disk_start is the starting logical offset of this array in the file 325 * 326 * bvec is a bio_vec of pages from the file that we want to decompress into 327 * 328 * vcnt is the count of pages in the biovec 329 * 330 * srclen is the number of bytes in pages_in 331 * 332 * The basic idea is that we have a bio that was created by readpages. 333 * The pages in the bio are for the uncompressed data, and they may not 334 * be contiguous. They all correspond to the range of bytes covered by 335 * the compressed extent. 336 */ 337 int btrfs_zlib_decompress_biovec(struct page **pages_in, 338 u64 disk_start, 339 struct bio_vec *bvec, 340 int vcnt, 341 size_t srclen) 342 { 343 int ret = 0; 344 int wbits = MAX_WBITS; 345 struct workspace *workspace; 346 char *data_in; 347 size_t total_out = 0; 348 unsigned long page_bytes_left; 349 unsigned long page_in_index = 0; 350 unsigned long page_out_index = 0; 351 struct page *page_out; 352 unsigned long total_pages_in = (srclen + PAGE_CACHE_SIZE - 1) / 353 PAGE_CACHE_SIZE; 354 unsigned long buf_start; 355 unsigned long buf_offset; 356 unsigned long bytes; 357 unsigned long working_bytes; 358 unsigned long pg_offset; 359 unsigned long start_byte; 360 unsigned long current_buf_start; 361 char *kaddr; 362 363 workspace = find_zlib_workspace(); 364 if (IS_ERR(workspace)) 365 return -ENOMEM; 366 367 data_in = kmap(pages_in[page_in_index]); 368 workspace->inf_strm.next_in = data_in; 369 workspace->inf_strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE); 370 workspace->inf_strm.total_in = 0; 371 372 workspace->inf_strm.total_out = 0; 373 workspace->inf_strm.next_out = workspace->buf; 374 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; 375 page_out = bvec[page_out_index].bv_page; 376 page_bytes_left = PAGE_CACHE_SIZE; 377 pg_offset = 0; 378 379 /* If it's deflate, and it's got no preset dictionary, then 380 we can tell zlib to skip the adler32 check. */ 381 if (srclen > 2 && !(data_in[1] & PRESET_DICT) && 382 ((data_in[0] & 0x0f) == Z_DEFLATED) && 383 !(((data_in[0]<<8) + data_in[1]) % 31)) { 384 385 wbits = -((data_in[0] >> 4) + 8); 386 workspace->inf_strm.next_in += 2; 387 workspace->inf_strm.avail_in -= 2; 388 } 389 390 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { 391 printk(KERN_WARNING "inflateInit failed\n"); 392 ret = -1; 393 goto out; 394 } 395 while (workspace->inf_strm.total_in < srclen) { 396 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); 397 if (ret != Z_OK && ret != Z_STREAM_END) 398 break; 399 /* 400 * buf start is the byte offset we're of the start of 401 * our workspace buffer 402 */ 403 buf_start = total_out; 404 405 /* total_out is the last byte of the workspace buffer */ 406 total_out = workspace->inf_strm.total_out; 407 408 working_bytes = total_out - buf_start; 409 410 /* 411 * start byte is the first byte of the page we're currently 412 * copying into relative to the start of the compressed data. 413 */ 414 start_byte = page_offset(page_out) - disk_start; 415 416 if (working_bytes == 0) { 417 /* we didn't make progress in this inflate 418 * call, we're done 419 */ 420 if (ret != Z_STREAM_END) 421 ret = -1; 422 break; 423 } 424 425 /* we haven't yet hit data corresponding to this page */ 426 if (total_out <= start_byte) 427 goto next; 428 429 /* 430 * the start of the data we care about is offset into 431 * the middle of our working buffer 432 */ 433 if (total_out > start_byte && buf_start < start_byte) { 434 buf_offset = start_byte - buf_start; 435 working_bytes -= buf_offset; 436 } else { 437 buf_offset = 0; 438 } 439 current_buf_start = buf_start; 440 441 /* copy bytes from the working buffer into the pages */ 442 while (working_bytes > 0) { 443 bytes = min(PAGE_CACHE_SIZE - pg_offset, 444 PAGE_CACHE_SIZE - buf_offset); 445 bytes = min(bytes, working_bytes); 446 kaddr = kmap_atomic(page_out, KM_USER0); 447 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, 448 bytes); 449 kunmap_atomic(kaddr, KM_USER0); 450 flush_dcache_page(page_out); 451 452 pg_offset += bytes; 453 page_bytes_left -= bytes; 454 buf_offset += bytes; 455 working_bytes -= bytes; 456 current_buf_start += bytes; 457 458 /* check if we need to pick another page */ 459 if (page_bytes_left == 0) { 460 page_out_index++; 461 if (page_out_index >= vcnt) { 462 ret = 0; 463 goto done; 464 } 465 466 page_out = bvec[page_out_index].bv_page; 467 pg_offset = 0; 468 page_bytes_left = PAGE_CACHE_SIZE; 469 start_byte = page_offset(page_out) - disk_start; 470 471 /* 472 * make sure our new page is covered by this 473 * working buffer 474 */ 475 if (total_out <= start_byte) 476 goto next; 477 478 /* the next page in the biovec might not 479 * be adjacent to the last page, but it 480 * might still be found inside this working 481 * buffer. bump our offset pointer 482 */ 483 if (total_out > start_byte && 484 current_buf_start < start_byte) { 485 buf_offset = start_byte - buf_start; 486 working_bytes = total_out - start_byte; 487 current_buf_start = buf_start + 488 buf_offset; 489 } 490 } 491 } 492 next: 493 workspace->inf_strm.next_out = workspace->buf; 494 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; 495 496 if (workspace->inf_strm.avail_in == 0) { 497 unsigned long tmp; 498 kunmap(pages_in[page_in_index]); 499 page_in_index++; 500 if (page_in_index >= total_pages_in) { 501 data_in = NULL; 502 break; 503 } 504 data_in = kmap(pages_in[page_in_index]); 505 workspace->inf_strm.next_in = data_in; 506 tmp = srclen - workspace->inf_strm.total_in; 507 workspace->inf_strm.avail_in = min(tmp, 508 PAGE_CACHE_SIZE); 509 } 510 } 511 if (ret != Z_STREAM_END) 512 ret = -1; 513 else 514 ret = 0; 515 done: 516 zlib_inflateEnd(&workspace->inf_strm); 517 if (data_in) 518 kunmap(pages_in[page_in_index]); 519 out: 520 free_workspace(workspace); 521 return ret; 522 } 523 524 /* 525 * a less complex decompression routine. Our compressed data fits in a 526 * single page, and we want to read a single page out of it. 527 * start_byte tells us the offset into the compressed data we're interested in 528 */ 529 int btrfs_zlib_decompress(unsigned char *data_in, 530 struct page *dest_page, 531 unsigned long start_byte, 532 size_t srclen, size_t destlen) 533 { 534 int ret = 0; 535 int wbits = MAX_WBITS; 536 struct workspace *workspace; 537 unsigned long bytes_left = destlen; 538 unsigned long total_out = 0; 539 char *kaddr; 540 541 if (destlen > PAGE_CACHE_SIZE) 542 return -ENOMEM; 543 544 workspace = find_zlib_workspace(); 545 if (IS_ERR(workspace)) 546 return -ENOMEM; 547 548 workspace->inf_strm.next_in = data_in; 549 workspace->inf_strm.avail_in = srclen; 550 workspace->inf_strm.total_in = 0; 551 552 workspace->inf_strm.next_out = workspace->buf; 553 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; 554 workspace->inf_strm.total_out = 0; 555 /* If it's deflate, and it's got no preset dictionary, then 556 we can tell zlib to skip the adler32 check. */ 557 if (srclen > 2 && !(data_in[1] & PRESET_DICT) && 558 ((data_in[0] & 0x0f) == Z_DEFLATED) && 559 !(((data_in[0]<<8) + data_in[1]) % 31)) { 560 561 wbits = -((data_in[0] >> 4) + 8); 562 workspace->inf_strm.next_in += 2; 563 workspace->inf_strm.avail_in -= 2; 564 } 565 566 if (Z_OK != zlib_inflateInit2(&workspace->inf_strm, wbits)) { 567 printk(KERN_WARNING "inflateInit failed\n"); 568 ret = -1; 569 goto out; 570 } 571 572 while (bytes_left > 0) { 573 unsigned long buf_start; 574 unsigned long buf_offset; 575 unsigned long bytes; 576 unsigned long pg_offset = 0; 577 578 ret = zlib_inflate(&workspace->inf_strm, Z_NO_FLUSH); 579 if (ret != Z_OK && ret != Z_STREAM_END) 580 break; 581 582 buf_start = total_out; 583 total_out = workspace->inf_strm.total_out; 584 585 if (total_out == buf_start) { 586 ret = -1; 587 break; 588 } 589 590 if (total_out <= start_byte) 591 goto next; 592 593 if (total_out > start_byte && buf_start < start_byte) 594 buf_offset = start_byte - buf_start; 595 else 596 buf_offset = 0; 597 598 bytes = min(PAGE_CACHE_SIZE - pg_offset, 599 PAGE_CACHE_SIZE - buf_offset); 600 bytes = min(bytes, bytes_left); 601 602 kaddr = kmap_atomic(dest_page, KM_USER0); 603 memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes); 604 kunmap_atomic(kaddr, KM_USER0); 605 606 pg_offset += bytes; 607 bytes_left -= bytes; 608 next: 609 workspace->inf_strm.next_out = workspace->buf; 610 workspace->inf_strm.avail_out = PAGE_CACHE_SIZE; 611 } 612 613 if (ret != Z_STREAM_END && bytes_left != 0) 614 ret = -1; 615 else 616 ret = 0; 617 618 zlib_inflateEnd(&workspace->inf_strm); 619 out: 620 free_workspace(workspace); 621 return ret; 622 } 623 624 void btrfs_zlib_exit(void) 625 { 626 free_workspaces(); 627 } 628