1*eda14cbcSMatt Macy /* 2*eda14cbcSMatt Macy * CDDL HEADER START 3*eda14cbcSMatt Macy * 4*eda14cbcSMatt Macy * The contents of this file are subject to the terms of the 5*eda14cbcSMatt Macy * Common Development and Distribution License (the "License"). 6*eda14cbcSMatt Macy * You may not use this file except in compliance with the License. 7*eda14cbcSMatt Macy * 8*eda14cbcSMatt Macy * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*eda14cbcSMatt Macy * or http://www.opensolaris.org/os/licensing. 10*eda14cbcSMatt Macy * See the License for the specific language governing permissions 11*eda14cbcSMatt Macy * and limitations under the License. 12*eda14cbcSMatt Macy * 13*eda14cbcSMatt Macy * When distributing Covered Code, include this CDDL HEADER in each 14*eda14cbcSMatt Macy * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*eda14cbcSMatt Macy * If applicable, add the following below this CDDL HEADER, with the 16*eda14cbcSMatt Macy * fields enclosed by brackets "[]" replaced with your own identifying 17*eda14cbcSMatt Macy * information: Portions Copyright [yyyy] [name of copyright owner] 18*eda14cbcSMatt Macy * 19*eda14cbcSMatt Macy * CDDL HEADER END 20*eda14cbcSMatt Macy */ 21*eda14cbcSMatt Macy 22*eda14cbcSMatt Macy /* 23*eda14cbcSMatt Macy * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24*eda14cbcSMatt Macy */ 25*eda14cbcSMatt Macy 26*eda14cbcSMatt Macy /* 27*eda14cbcSMatt Macy * We keep our own copy of this algorithm for 3 main reasons: 28*eda14cbcSMatt Macy * 1. If we didn't, anyone modifying common/os/compress.c would 29*eda14cbcSMatt Macy * directly break our on disk format 30*eda14cbcSMatt Macy * 2. Our version of lzjb does not have a number of checks that the 31*eda14cbcSMatt Macy * common/os version needs and uses 32*eda14cbcSMatt Macy * 3. We initialize the lempel to ensure deterministic results, 33*eda14cbcSMatt Macy * so that identical blocks can always be deduplicated. 34*eda14cbcSMatt Macy * In particular, we are adding the "feature" that compress() can 35*eda14cbcSMatt Macy * take a destination buffer size and returns the compressed length, or the 36*eda14cbcSMatt Macy * source length if compression would overflow the destination buffer. 37*eda14cbcSMatt Macy */ 38*eda14cbcSMatt Macy 39*eda14cbcSMatt Macy #include <sys/zfs_context.h> 40*eda14cbcSMatt Macy #include <sys/zio_compress.h> 41*eda14cbcSMatt Macy 42*eda14cbcSMatt Macy #define MATCH_BITS 6 43*eda14cbcSMatt Macy #define MATCH_MIN 3 44*eda14cbcSMatt Macy #define MATCH_MAX ((1 << MATCH_BITS) + (MATCH_MIN - 1)) 45*eda14cbcSMatt Macy #define OFFSET_MASK ((1 << (16 - MATCH_BITS)) - 1) 46*eda14cbcSMatt Macy #define LEMPEL_SIZE 1024 47*eda14cbcSMatt Macy 48*eda14cbcSMatt Macy /*ARGSUSED*/ 49*eda14cbcSMatt Macy size_t 50*eda14cbcSMatt Macy lzjb_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) 51*eda14cbcSMatt Macy { 52*eda14cbcSMatt Macy uchar_t *src = s_start; 53*eda14cbcSMatt Macy uchar_t *dst = d_start; 54*eda14cbcSMatt Macy uchar_t *cpy; 55*eda14cbcSMatt Macy uchar_t *copymap = NULL; 56*eda14cbcSMatt Macy int copymask = 1 << (NBBY - 1); 57*eda14cbcSMatt Macy int mlen, offset, hash; 58*eda14cbcSMatt Macy uint16_t *hp; 59*eda14cbcSMatt Macy uint16_t *lempel; 60*eda14cbcSMatt Macy 61*eda14cbcSMatt Macy lempel = kmem_zalloc(LEMPEL_SIZE * sizeof (uint16_t), KM_SLEEP); 62*eda14cbcSMatt Macy while (src < (uchar_t *)s_start + s_len) { 63*eda14cbcSMatt Macy if ((copymask <<= 1) == (1 << NBBY)) { 64*eda14cbcSMatt Macy if (dst >= (uchar_t *)d_start + d_len - 1 - 2 * NBBY) { 65*eda14cbcSMatt Macy kmem_free(lempel, 66*eda14cbcSMatt Macy LEMPEL_SIZE*sizeof (uint16_t)); 67*eda14cbcSMatt Macy return (s_len); 68*eda14cbcSMatt Macy } 69*eda14cbcSMatt Macy copymask = 1; 70*eda14cbcSMatt Macy copymap = dst; 71*eda14cbcSMatt Macy *dst++ = 0; 72*eda14cbcSMatt Macy } 73*eda14cbcSMatt Macy if (src > (uchar_t *)s_start + s_len - MATCH_MAX) { 74*eda14cbcSMatt Macy *dst++ = *src++; 75*eda14cbcSMatt Macy continue; 76*eda14cbcSMatt Macy } 77*eda14cbcSMatt Macy hash = (src[0] << 16) + (src[1] << 8) + src[2]; 78*eda14cbcSMatt Macy hash += hash >> 9; 79*eda14cbcSMatt Macy hash += hash >> 5; 80*eda14cbcSMatt Macy hp = &lempel[hash & (LEMPEL_SIZE - 1)]; 81*eda14cbcSMatt Macy offset = (intptr_t)(src - *hp) & OFFSET_MASK; 82*eda14cbcSMatt Macy *hp = (uint16_t)(uintptr_t)src; 83*eda14cbcSMatt Macy cpy = src - offset; 84*eda14cbcSMatt Macy if (cpy >= (uchar_t *)s_start && cpy != src && 85*eda14cbcSMatt Macy src[0] == cpy[0] && src[1] == cpy[1] && src[2] == cpy[2]) { 86*eda14cbcSMatt Macy *copymap |= copymask; 87*eda14cbcSMatt Macy for (mlen = MATCH_MIN; mlen < MATCH_MAX; mlen++) 88*eda14cbcSMatt Macy if (src[mlen] != cpy[mlen]) 89*eda14cbcSMatt Macy break; 90*eda14cbcSMatt Macy *dst++ = ((mlen - MATCH_MIN) << (NBBY - MATCH_BITS)) | 91*eda14cbcSMatt Macy (offset >> NBBY); 92*eda14cbcSMatt Macy *dst++ = (uchar_t)offset; 93*eda14cbcSMatt Macy src += mlen; 94*eda14cbcSMatt Macy } else { 95*eda14cbcSMatt Macy *dst++ = *src++; 96*eda14cbcSMatt Macy } 97*eda14cbcSMatt Macy } 98*eda14cbcSMatt Macy 99*eda14cbcSMatt Macy kmem_free(lempel, LEMPEL_SIZE * sizeof (uint16_t)); 100*eda14cbcSMatt Macy return (dst - (uchar_t *)d_start); 101*eda14cbcSMatt Macy } 102*eda14cbcSMatt Macy 103*eda14cbcSMatt Macy /*ARGSUSED*/ 104*eda14cbcSMatt Macy int 105*eda14cbcSMatt Macy lzjb_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) 106*eda14cbcSMatt Macy { 107*eda14cbcSMatt Macy uchar_t *src = s_start; 108*eda14cbcSMatt Macy uchar_t *dst = d_start; 109*eda14cbcSMatt Macy uchar_t *d_end = (uchar_t *)d_start + d_len; 110*eda14cbcSMatt Macy uchar_t *cpy; 111*eda14cbcSMatt Macy uchar_t copymap = 0; 112*eda14cbcSMatt Macy int copymask = 1 << (NBBY - 1); 113*eda14cbcSMatt Macy 114*eda14cbcSMatt Macy while (dst < d_end) { 115*eda14cbcSMatt Macy if ((copymask <<= 1) == (1 << NBBY)) { 116*eda14cbcSMatt Macy copymask = 1; 117*eda14cbcSMatt Macy copymap = *src++; 118*eda14cbcSMatt Macy } 119*eda14cbcSMatt Macy if (copymap & copymask) { 120*eda14cbcSMatt Macy int mlen = (src[0] >> (NBBY - MATCH_BITS)) + MATCH_MIN; 121*eda14cbcSMatt Macy int offset = ((src[0] << NBBY) | src[1]) & OFFSET_MASK; 122*eda14cbcSMatt Macy src += 2; 123*eda14cbcSMatt Macy if ((cpy = dst - offset) < (uchar_t *)d_start) 124*eda14cbcSMatt Macy return (-1); 125*eda14cbcSMatt Macy while (--mlen >= 0 && dst < d_end) 126*eda14cbcSMatt Macy *dst++ = *cpy++; 127*eda14cbcSMatt Macy } else { 128*eda14cbcSMatt Macy *dst++ = *src++; 129*eda14cbcSMatt Macy } 130*eda14cbcSMatt Macy } 131*eda14cbcSMatt Macy return (0); 132*eda14cbcSMatt Macy } 133