1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 1994 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 28*7c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 29*7c478bd9Sstevel@tonic-gate 30*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 31*7c478bd9Sstevel@tonic-gate 32*7c478bd9Sstevel@tonic-gate 33*7c478bd9Sstevel@tonic-gate #include <unistd.h> 34*7c478bd9Sstevel@tonic-gate #include <stdlib.h> 35*7c478bd9Sstevel@tonic-gate #include <stdio.h> 36*7c478bd9Sstevel@tonic-gate 37*7c478bd9Sstevel@tonic-gate #define BYTE 8 38*7c478bd9Sstevel@tonic-gate #define QW 1 /* width of bas-q digit in bits */ 39*7c478bd9Sstevel@tonic-gate 40*7c478bd9Sstevel@tonic-gate /* 41*7c478bd9Sstevel@tonic-gate * this stuff should be local and hidden; it was made 42*7c478bd9Sstevel@tonic-gate * accessible outside for dirty reasons: 20% faster spell 43*7c478bd9Sstevel@tonic-gate */ 44*7c478bd9Sstevel@tonic-gate #include "huff.h" 45*7c478bd9Sstevel@tonic-gate struct huff huffcode; 46*7c478bd9Sstevel@tonic-gate 47*7c478bd9Sstevel@tonic-gate /* 48*7c478bd9Sstevel@tonic-gate * Infinite Huffman code 49*7c478bd9Sstevel@tonic-gate * 50*7c478bd9Sstevel@tonic-gate * Let the messages be exponentially distributed with ratio r: 51*7c478bd9Sstevel@tonic-gate * P {message k} = r^k*(1-r), k = 0, 1, ... 52*7c478bd9Sstevel@tonic-gate * Let the messages be coded in base q, and suppose 53*7c478bd9Sstevel@tonic-gate * r^n = 1/q 54*7c478bd9Sstevel@tonic-gate * If each decade(base q) contains n codes, then 55*7c478bd9Sstevel@tonic-gate * the messages assigned to each decade will be q times 56*7c478bd9Sstevel@tonic-gate * as probable as the next. Moreover the code for the tail of 57*7c478bd9Sstevel@tonic-gate * the distribution after truncating one decade should look 58*7c478bd9Sstevel@tonic-gate * just like the original, but longer by one leading digit q-1. 59*7c478bd9Sstevel@tonic-gate * q(z+n) = z + (q-1)q^w 60*7c478bd9Sstevel@tonic-gate * where z is first code of decade, w is width of code, in shortest 61*7c478bd9Sstevel@tonic-gate * full decade. Examples, base 2: 62*7c478bd9Sstevel@tonic-gate * r^1 = 1/2 r^5 = 1/2 63*7c478bd9Sstevel@tonic-gate * 0 0110 64*7c478bd9Sstevel@tonic-gate * 10 0111 65*7c478bd9Sstevel@tonic-gate * 110 1000 66*7c478bd9Sstevel@tonic-gate * 1110 1001 67*7c478bd9Sstevel@tonic-gate * ... 1010 68*7c478bd9Sstevel@tonic-gate * 10110 69*7c478bd9Sstevel@tonic-gate * w = 1, z = 0 w = 4, z = 0110 70*7c478bd9Sstevel@tonic-gate * Rewriting slightly 71*7c478bd9Sstevel@tonic-gate * (q-1)z + q*n = (q-1)q^w 72*7c478bd9Sstevel@tonic-gate * whence z is a multiple of q and n is a multiple of q-1. Let 73*7c478bd9Sstevel@tonic-gate * z = cq, n = d(q-1) 74*7c478bd9Sstevel@tonic-gate * We pick w to be the least integer such that 75*7c478bd9Sstevel@tonic-gate * d = n/(q-1) <= q^(w-1) 76*7c478bd9Sstevel@tonic-gate * Then solve for c 77*7c478bd9Sstevel@tonic-gate * c = q^(w-1) - d 78*7c478bd9Sstevel@tonic-gate * If c is not zero, the first decade may be preceded by 79*7c478bd9Sstevel@tonic-gate * even shorter (w-1)-digit codes 0, 1, ..., c-1. Thus 80*7c478bd9Sstevel@tonic-gate * the example code with r^5 = 1/2 becomes 81*7c478bd9Sstevel@tonic-gate * 000 82*7c478bd9Sstevel@tonic-gate * 001 83*7c478bd9Sstevel@tonic-gate * 010 84*7c478bd9Sstevel@tonic-gate * 0110 85*7c478bd9Sstevel@tonic-gate * 0111 86*7c478bd9Sstevel@tonic-gate * 1000 87*7c478bd9Sstevel@tonic-gate * 1001 88*7c478bd9Sstevel@tonic-gate * 1010 89*7c478bd9Sstevel@tonic-gate * 10110 90*7c478bd9Sstevel@tonic-gate * ... 91*7c478bd9Sstevel@tonic-gate * 110110 92*7c478bd9Sstevel@tonic-gate * ... 93*7c478bd9Sstevel@tonic-gate * The expected number of base-q digits in a codeword is then 94*7c478bd9Sstevel@tonic-gate * w - 1 + r^c/(1-r^n) 95*7c478bd9Sstevel@tonic-gate * The present routines require q to be a power of 2 96*7c478bd9Sstevel@tonic-gate */ 97*7c478bd9Sstevel@tonic-gate /* 98*7c478bd9Sstevel@tonic-gate * There is a lot of hanky-panky with left justification against 99*7c478bd9Sstevel@tonic-gate * sign instead of simple left justification because 100*7c478bd9Sstevel@tonic-gate * unsigned long is not available 101*7c478bd9Sstevel@tonic-gate */ 102*7c478bd9Sstevel@tonic-gate #define L (BYTE*(sizeof (long))-1) /* length of signless long */ 103*7c478bd9Sstevel@tonic-gate #define MASK (~((unsigned long)1<<L)) /* mask out sign */ 104*7c478bd9Sstevel@tonic-gate 105*7c478bd9Sstevel@tonic-gate /* 106*7c478bd9Sstevel@tonic-gate * decode the prefix of word y (which is left justified against sign) 107*7c478bd9Sstevel@tonic-gate * place mesage number into place pointed to by kp 108*7c478bd9Sstevel@tonic-gate * return length (in bits) of decoded prefix or 0 if code is out of 109*7c478bd9Sstevel@tonic-gate * range 110*7c478bd9Sstevel@tonic-gate */ 111*7c478bd9Sstevel@tonic-gate int 112*7c478bd9Sstevel@tonic-gate decode(long y, long *pk) 113*7c478bd9Sstevel@tonic-gate { 114*7c478bd9Sstevel@tonic-gate register l; 115*7c478bd9Sstevel@tonic-gate long v; 116*7c478bd9Sstevel@tonic-gate if (y < cs) { 117*7c478bd9Sstevel@tonic-gate *pk = y >> (long)(L+QW-w); 118*7c478bd9Sstevel@tonic-gate return (w-QW); 119*7c478bd9Sstevel@tonic-gate } 120*7c478bd9Sstevel@tonic-gate for (l = w, v = v0; y >= qcs; 121*7c478bd9Sstevel@tonic-gate y = ((unsigned long)y << QW) & MASK, v += n) 122*7c478bd9Sstevel@tonic-gate if ((l += QW) > L) 123*7c478bd9Sstevel@tonic-gate return (0); 124*7c478bd9Sstevel@tonic-gate *pk = v + (y>>(long)(L-w)); 125*7c478bd9Sstevel@tonic-gate return (l); 126*7c478bd9Sstevel@tonic-gate } 127*7c478bd9Sstevel@tonic-gate 128*7c478bd9Sstevel@tonic-gate /* 129*7c478bd9Sstevel@tonic-gate * encode message k and put result (right justified) into 130*7c478bd9Sstevel@tonic-gate * place pointed to by py. 131*7c478bd9Sstevel@tonic-gate * return length (in bits) of result, 132*7c478bd9Sstevel@tonic-gate * or 0 if code is too long 133*7c478bd9Sstevel@tonic-gate */ 134*7c478bd9Sstevel@tonic-gate 135*7c478bd9Sstevel@tonic-gate int 136*7c478bd9Sstevel@tonic-gate encode(long k, long *py) 137*7c478bd9Sstevel@tonic-gate { 138*7c478bd9Sstevel@tonic-gate register l; 139*7c478bd9Sstevel@tonic-gate long y; 140*7c478bd9Sstevel@tonic-gate if (k < c) { 141*7c478bd9Sstevel@tonic-gate *py = k; 142*7c478bd9Sstevel@tonic-gate return (w-QW); 143*7c478bd9Sstevel@tonic-gate } 144*7c478bd9Sstevel@tonic-gate for (k -= c, y = 1, l = w; k >= n; k -= n, y <<= QW) 145*7c478bd9Sstevel@tonic-gate if ((l += QW) > L) 146*7c478bd9Sstevel@tonic-gate return (0); 147*7c478bd9Sstevel@tonic-gate *py = ((y-1)<<w) + cq + k; 148*7c478bd9Sstevel@tonic-gate return (l); 149*7c478bd9Sstevel@tonic-gate } 150*7c478bd9Sstevel@tonic-gate 151*7c478bd9Sstevel@tonic-gate 152*7c478bd9Sstevel@tonic-gate /* 153*7c478bd9Sstevel@tonic-gate * Initialization code, given expected value of k 154*7c478bd9Sstevel@tonic-gate * E(k) = r/(1-r) = a 155*7c478bd9Sstevel@tonic-gate * and given base width b 156*7c478bd9Sstevel@tonic-gate * return expected length of coded messages 157*7c478bd9Sstevel@tonic-gate */ 158*7c478bd9Sstevel@tonic-gate static struct qlog { 159*7c478bd9Sstevel@tonic-gate long p; 160*7c478bd9Sstevel@tonic-gate double u; 161*7c478bd9Sstevel@tonic-gate } z; 162*7c478bd9Sstevel@tonic-gate 163*7c478bd9Sstevel@tonic-gate static struct qlog 164*7c478bd9Sstevel@tonic-gate qlog(double x, double y, long p, double u) /* find smallest p so x^p<=y */ 165*7c478bd9Sstevel@tonic-gate { 166*7c478bd9Sstevel@tonic-gate 167*7c478bd9Sstevel@tonic-gate if (u/x <= y) { 168*7c478bd9Sstevel@tonic-gate z.p = 0; 169*7c478bd9Sstevel@tonic-gate z.u = 1; 170*7c478bd9Sstevel@tonic-gate } else { 171*7c478bd9Sstevel@tonic-gate z = qlog(x, y, p+p, u*u); 172*7c478bd9Sstevel@tonic-gate if (u*z.u/x > y) { 173*7c478bd9Sstevel@tonic-gate z.p += p; 174*7c478bd9Sstevel@tonic-gate z.u *= u; 175*7c478bd9Sstevel@tonic-gate } 176*7c478bd9Sstevel@tonic-gate } 177*7c478bd9Sstevel@tonic-gate return (z); 178*7c478bd9Sstevel@tonic-gate } 179*7c478bd9Sstevel@tonic-gate 180*7c478bd9Sstevel@tonic-gate double 181*7c478bd9Sstevel@tonic-gate huff(float a) 182*7c478bd9Sstevel@tonic-gate { 183*7c478bd9Sstevel@tonic-gate register i, q; 184*7c478bd9Sstevel@tonic-gate long d, j; 185*7c478bd9Sstevel@tonic-gate double r = a/(1.0 + a); 186*7c478bd9Sstevel@tonic-gate double rc, rq; 187*7c478bd9Sstevel@tonic-gate 188*7c478bd9Sstevel@tonic-gate for (i = 0, q = 1, rq = r; i < QW; i++, q *= 2, rq *= rq) 189*7c478bd9Sstevel@tonic-gate continue; 190*7c478bd9Sstevel@tonic-gate rq /= r; /* rq = r^(q-1) */ 191*7c478bd9Sstevel@tonic-gate (void) qlog(rq, 1./q, 1L, rq); 192*7c478bd9Sstevel@tonic-gate d = z.p; 193*7c478bd9Sstevel@tonic-gate n = d*(q-1); 194*7c478bd9Sstevel@tonic-gate if (n != d * (q - 1)) 195*7c478bd9Sstevel@tonic-gate abort(); /* time to make n long */ 196*7c478bd9Sstevel@tonic-gate for (w = QW, j = 1; j < d; w += QW, j *= q) 197*7c478bd9Sstevel@tonic-gate continue; 198*7c478bd9Sstevel@tonic-gate c = j - d; 199*7c478bd9Sstevel@tonic-gate cq = c*q; 200*7c478bd9Sstevel@tonic-gate cs = cq<<(L-w); 201*7c478bd9Sstevel@tonic-gate qcs = (((long)(q-1)<<w) + cq) << (L-QW-w); 202*7c478bd9Sstevel@tonic-gate v0 = c - cq; 203*7c478bd9Sstevel@tonic-gate for (i = 0, rc = 1; i < c; i++, rc *= r) /* rc = r^c */ 204*7c478bd9Sstevel@tonic-gate continue; 205*7c478bd9Sstevel@tonic-gate return (w + QW*(rc/(1-z.u) - 1)); 206*7c478bd9Sstevel@tonic-gate } 207*7c478bd9Sstevel@tonic-gate 208*7c478bd9Sstevel@tonic-gate void 209*7c478bd9Sstevel@tonic-gate whuff(void) 210*7c478bd9Sstevel@tonic-gate { 211*7c478bd9Sstevel@tonic-gate (void) fwrite((char *) & huffcode, sizeof (huffcode), 1, stdout); 212*7c478bd9Sstevel@tonic-gate } 213*7c478bd9Sstevel@tonic-gate 214*7c478bd9Sstevel@tonic-gate int 215*7c478bd9Sstevel@tonic-gate rhuff(FILE *f) 216*7c478bd9Sstevel@tonic-gate { 217*7c478bd9Sstevel@tonic-gate return (read(fileno(f), (char *)&huffcode, sizeof (huffcode)) == 218*7c478bd9Sstevel@tonic-gate sizeof (huffcode)); 219*7c478bd9Sstevel@tonic-gate } 220