1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #include <unistd.h> 31 #include <stdlib.h> 32 #include <stdio.h> 33 34 #define BYTE 8 35 #define QW 1 /* width of bas-q digit in bits */ 36 37 /* 38 * this stuff should be local and hidden; it was made 39 * accessible outside for dirty reasons: 20% faster spell 40 */ 41 #include "huff.h" 42 struct huff huffcode; 43 44 /* 45 * Infinite Huffman code 46 * 47 * Let the messages be exponentially distributed with ratio r: 48 * P {message k} = r^k*(1-r), k = 0, 1, ... 49 * Let the messages be coded in base q, and suppose 50 * r^n = 1/q 51 * If each decade(base q) contains n codes, then 52 * the messages assigned to each decade will be q times 53 * as probable as the next. Moreover the code for the tail of 54 * the distribution after truncating one decade should look 55 * just like the original, but longer by one leading digit q-1. 56 * q(z+n) = z + (q-1)q^w 57 * where z is first code of decade, w is width of code, in shortest 58 * full decade. Examples, base 2: 59 * r^1 = 1/2 r^5 = 1/2 60 * 0 0110 61 * 10 0111 62 * 110 1000 63 * 1110 1001 64 * ... 1010 65 * 10110 66 * w = 1, z = 0 w = 4, z = 0110 67 * Rewriting slightly 68 * (q-1)z + q*n = (q-1)q^w 69 * whence z is a multiple of q and n is a multiple of q-1. Let 70 * z = cq, n = d(q-1) 71 * We pick w to be the least integer such that 72 * d = n/(q-1) <= q^(w-1) 73 * Then solve for c 74 * c = q^(w-1) - d 75 * If c is not zero, the first decade may be preceded by 76 * even shorter (w-1)-digit codes 0, 1, ..., c-1. Thus 77 * the example code with r^5 = 1/2 becomes 78 * 000 79 * 001 80 * 010 81 * 0110 82 * 0111 83 * 1000 84 * 1001 85 * 1010 86 * 10110 87 * ... 88 * 110110 89 * ... 90 * The expected number of base-q digits in a codeword is then 91 * w - 1 + r^c/(1-r^n) 92 * The present routines require q to be a power of 2 93 */ 94 /* 95 * There is a lot of hanky-panky with left justification against 96 * sign instead of simple left justification because 97 * unsigned long is not available 98 */ 99 #define L (BYTE*(sizeof (long))-1) /* length of signless long */ 100 #define MASK (~((unsigned long)1<<L)) /* mask out sign */ 101 102 /* 103 * decode the prefix of word y (which is left justified against sign) 104 * place mesage number into place pointed to by kp 105 * return length (in bits) of decoded prefix or 0 if code is out of 106 * range 107 */ 108 int 109 decode(long y, long *pk) 110 { 111 int l; 112 long v; 113 if (y < cs) { 114 *pk = y >> (long)(L+QW-w); 115 return (w-QW); 116 } 117 for (l = w, v = v0; y >= qcs; 118 y = ((unsigned long)y << QW) & MASK, v += n) 119 if ((l += QW) > L) 120 return (0); 121 *pk = v + (y>>(long)(L-w)); 122 return (l); 123 } 124 125 /* 126 * encode message k and put result (right justified) into 127 * place pointed to by py. 128 * return length (in bits) of result, 129 * or 0 if code is too long 130 */ 131 132 int 133 encode(long k, long *py) 134 { 135 int l; 136 long y; 137 if (k < c) { 138 *py = k; 139 return (w-QW); 140 } 141 for (k -= c, y = 1, l = w; k >= n; k -= n, y <<= QW) 142 if ((l += QW) > L) 143 return (0); 144 *py = ((y-1)<<w) + cq + k; 145 return (l); 146 } 147 148 149 /* 150 * Initialization code, given expected value of k 151 * E(k) = r/(1-r) = a 152 * and given base width b 153 * return expected length of coded messages 154 */ 155 static struct qlog { 156 long p; 157 double u; 158 } z; 159 160 static struct qlog 161 qlog(double x, double y, long p, double u) /* find smallest p so x^p<=y */ 162 { 163 164 if (u/x <= y) { 165 z.p = 0; 166 z.u = 1; 167 } else { 168 z = qlog(x, y, p+p, u*u); 169 if (u*z.u/x > y) { 170 z.p += p; 171 z.u *= u; 172 } 173 } 174 return (z); 175 } 176 177 double 178 huff(float a) 179 { 180 int i, q; 181 long d, j; 182 double r = a/(1.0 + a); 183 double rc, rq; 184 185 for (i = 0, q = 1, rq = r; i < QW; i++, q *= 2, rq *= rq) 186 continue; 187 rq /= r; /* rq = r^(q-1) */ 188 (void) qlog(rq, 1./q, 1L, rq); 189 d = z.p; 190 n = d*(q-1); 191 if (n != d * (q - 1)) 192 abort(); /* time to make n long */ 193 for (w = QW, j = 1; j < d; w += QW, j *= q) 194 continue; 195 c = j - d; 196 cq = c*q; 197 cs = cq<<(L-w); 198 qcs = (((long)(q-1)<<w) + cq) << (L-QW-w); 199 v0 = c - cq; 200 for (i = 0, rc = 1; i < c; i++, rc *= r) /* rc = r^c */ 201 continue; 202 return (w + QW*(rc/(1-z.u) - 1)); 203 } 204 205 void 206 whuff(void) 207 { 208 (void) fwrite((char *) & huffcode, sizeof (huffcode), 1, stdout); 209 } 210 211 int 212 rhuff(FILE *f) 213 { 214 return (read(fileno(f), (char *)&huffcode, sizeof (huffcode)) == 215 sizeof (huffcode)); 216 } 217