1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28 /* All Rights Reserved */ 29 30 #pragma ident "%Z%%M% %I% %E% SMI" 31 32 33 #include <unistd.h> 34 #include <stdlib.h> 35 #include <stdio.h> 36 37 #define BYTE 8 38 #define QW 1 /* width of bas-q digit in bits */ 39 40 /* 41 * this stuff should be local and hidden; it was made 42 * accessible outside for dirty reasons: 20% faster spell 43 */ 44 #include "huff.h" 45 struct huff huffcode; 46 47 /* 48 * Infinite Huffman code 49 * 50 * Let the messages be exponentially distributed with ratio r: 51 * P {message k} = r^k*(1-r), k = 0, 1, ... 52 * Let the messages be coded in base q, and suppose 53 * r^n = 1/q 54 * If each decade(base q) contains n codes, then 55 * the messages assigned to each decade will be q times 56 * as probable as the next. Moreover the code for the tail of 57 * the distribution after truncating one decade should look 58 * just like the original, but longer by one leading digit q-1. 59 * q(z+n) = z + (q-1)q^w 60 * where z is first code of decade, w is width of code, in shortest 61 * full decade. Examples, base 2: 62 * r^1 = 1/2 r^5 = 1/2 63 * 0 0110 64 * 10 0111 65 * 110 1000 66 * 1110 1001 67 * ... 1010 68 * 10110 69 * w = 1, z = 0 w = 4, z = 0110 70 * Rewriting slightly 71 * (q-1)z + q*n = (q-1)q^w 72 * whence z is a multiple of q and n is a multiple of q-1. Let 73 * z = cq, n = d(q-1) 74 * We pick w to be the least integer such that 75 * d = n/(q-1) <= q^(w-1) 76 * Then solve for c 77 * c = q^(w-1) - d 78 * If c is not zero, the first decade may be preceded by 79 * even shorter (w-1)-digit codes 0, 1, ..., c-1. Thus 80 * the example code with r^5 = 1/2 becomes 81 * 000 82 * 001 83 * 010 84 * 0110 85 * 0111 86 * 1000 87 * 1001 88 * 1010 89 * 10110 90 * ... 91 * 110110 92 * ... 93 * The expected number of base-q digits in a codeword is then 94 * w - 1 + r^c/(1-r^n) 95 * The present routines require q to be a power of 2 96 */ 97 /* 98 * There is a lot of hanky-panky with left justification against 99 * sign instead of simple left justification because 100 * unsigned long is not available 101 */ 102 #define L (BYTE*(sizeof (long))-1) /* length of signless long */ 103 #define MASK (~((unsigned long)1<<L)) /* mask out sign */ 104 105 /* 106 * decode the prefix of word y (which is left justified against sign) 107 * place mesage number into place pointed to by kp 108 * return length (in bits) of decoded prefix or 0 if code is out of 109 * range 110 */ 111 int 112 decode(long y, long *pk) 113 { 114 int l; 115 long v; 116 if (y < cs) { 117 *pk = y >> (long)(L+QW-w); 118 return (w-QW); 119 } 120 for (l = w, v = v0; y >= qcs; 121 y = ((unsigned long)y << QW) & MASK, v += n) 122 if ((l += QW) > L) 123 return (0); 124 *pk = v + (y>>(long)(L-w)); 125 return (l); 126 } 127 128 /* 129 * encode message k and put result (right justified) into 130 * place pointed to by py. 131 * return length (in bits) of result, 132 * or 0 if code is too long 133 */ 134 135 int 136 encode(long k, long *py) 137 { 138 int l; 139 long y; 140 if (k < c) { 141 *py = k; 142 return (w-QW); 143 } 144 for (k -= c, y = 1, l = w; k >= n; k -= n, y <<= QW) 145 if ((l += QW) > L) 146 return (0); 147 *py = ((y-1)<<w) + cq + k; 148 return (l); 149 } 150 151 152 /* 153 * Initialization code, given expected value of k 154 * E(k) = r/(1-r) = a 155 * and given base width b 156 * return expected length of coded messages 157 */ 158 static struct qlog { 159 long p; 160 double u; 161 } z; 162 163 static struct qlog 164 qlog(double x, double y, long p, double u) /* find smallest p so x^p<=y */ 165 { 166 167 if (u/x <= y) { 168 z.p = 0; 169 z.u = 1; 170 } else { 171 z = qlog(x, y, p+p, u*u); 172 if (u*z.u/x > y) { 173 z.p += p; 174 z.u *= u; 175 } 176 } 177 return (z); 178 } 179 180 double 181 huff(float a) 182 { 183 int i, q; 184 long d, j; 185 double r = a/(1.0 + a); 186 double rc, rq; 187 188 for (i = 0, q = 1, rq = r; i < QW; i++, q *= 2, rq *= rq) 189 continue; 190 rq /= r; /* rq = r^(q-1) */ 191 (void) qlog(rq, 1./q, 1L, rq); 192 d = z.p; 193 n = d*(q-1); 194 if (n != d * (q - 1)) 195 abort(); /* time to make n long */ 196 for (w = QW, j = 1; j < d; w += QW, j *= q) 197 continue; 198 c = j - d; 199 cq = c*q; 200 cs = cq<<(L-w); 201 qcs = (((long)(q-1)<<w) + cq) << (L-QW-w); 202 v0 = c - cq; 203 for (i = 0, rc = 1; i < c; i++, rc *= r) /* rc = r^c */ 204 continue; 205 return (w + QW*(rc/(1-z.u) - 1)); 206 } 207 208 void 209 whuff(void) 210 { 211 (void) fwrite((char *) & huffcode, sizeof (huffcode), 1, stdout); 212 } 213 214 int 215 rhuff(FILE *f) 216 { 217 return (read(fileno(f), (char *)&huffcode, sizeof (huffcode)) == 218 sizeof (huffcode)); 219 } 220