17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 55aefb655Srie * Common Development and Distribution License (the "License"). 65aefb655Srie * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 215aefb655Srie 227c478bd9Sstevel@tonic-gate /* 23*6b3ba5bdSAli Bahrami * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate * Use is subject to license terms. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 27a194faf8Srie #include <_string_table.h> 287c478bd9Sstevel@tonic-gate #include <strings.h> 297c478bd9Sstevel@tonic-gate #include <sgs.h> 307c478bd9Sstevel@tonic-gate #include <stdio.h> 317c478bd9Sstevel@tonic-gate 327c478bd9Sstevel@tonic-gate /* 33a194faf8Srie * This file provides the interfaces to build a Str_tbl suitable for use by 34a194faf8Srie * either the sgsmsg message system, or a standard ELF string table (SHT_STRTAB) 35a194faf8Srie * as created by ld(1). 367c478bd9Sstevel@tonic-gate * 37a194faf8Srie * There are two modes which can be used when constructing a string table: 387c478bd9Sstevel@tonic-gate * 397c478bd9Sstevel@tonic-gate * st_new(0) 407c478bd9Sstevel@tonic-gate * standard string table - no compression. This is the 41a194faf8Srie * traditional, fast method. 427c478bd9Sstevel@tonic-gate * 43a194faf8Srie * st_new(FLG_STTAB_COMPRESS) 44a194faf8Srie * builds a compressed string table which both eliminates 45a194faf8Srie * duplicate strings, and permits strings with common suffixes 46a194faf8Srie * (atexit vs. exit) to overlap in the table. This provides space 47a194faf8Srie * savings for many string tables. Although more work than the 48a194faf8Srie * traditional method, the algorithms used are designed to scale 49a194faf8Srie * and keep any overhead at a minimum. 507c478bd9Sstevel@tonic-gate * 51a194faf8Srie * These string tables are built with a common interface in a two-pass manner. 52a194faf8Srie * The first pass finds all of the strings required for the string-table and 53a194faf8Srie * calculates the size required for the final string table. 547c478bd9Sstevel@tonic-gate * 55a194faf8Srie * The second pass allocates the string table, populates the strings into the 56a194faf8Srie * table and returns the offsets the strings have been assigned. 577c478bd9Sstevel@tonic-gate * 587c478bd9Sstevel@tonic-gate * The calling sequence to build and populate a string table is: 597c478bd9Sstevel@tonic-gate * 607c478bd9Sstevel@tonic-gate * st_new(); // initialize strtab 617c478bd9Sstevel@tonic-gate * 627c478bd9Sstevel@tonic-gate * st_insert(st1); // first pass of strings ... 637c478bd9Sstevel@tonic-gate * // calculates size required for 647c478bd9Sstevel@tonic-gate * // string table 657c478bd9Sstevel@tonic-gate * 667c478bd9Sstevel@tonic-gate * st_delstring(st?); // remove string previously 677c478bd9Sstevel@tonic-gate * // inserted 687c478bd9Sstevel@tonic-gate * st_insert(stN); 697c478bd9Sstevel@tonic-gate * 707c478bd9Sstevel@tonic-gate * st_getstrtab_sz(); // freezes strtab and computes 717c478bd9Sstevel@tonic-gate * // size of table. 727c478bd9Sstevel@tonic-gate * 737c478bd9Sstevel@tonic-gate * st_setstrbuf(); // associates a final destination 747c478bd9Sstevel@tonic-gate * // for the string table 757c478bd9Sstevel@tonic-gate * 767c478bd9Sstevel@tonic-gate * st_setstring(st1); // populate the string table 777c478bd9Sstevel@tonic-gate * ... // offsets are based off of second 787c478bd9Sstevel@tonic-gate * // pass through the string table 797c478bd9Sstevel@tonic-gate * st_setstring(stN); 807c478bd9Sstevel@tonic-gate * 817c478bd9Sstevel@tonic-gate * st_destroy(); // tear down string table 827c478bd9Sstevel@tonic-gate * // structures. 837c478bd9Sstevel@tonic-gate * 847c478bd9Sstevel@tonic-gate * String Suffix Compression Algorithm: 857c478bd9Sstevel@tonic-gate * 867c478bd9Sstevel@tonic-gate * Here's a quick high level overview of the Suffix String 877c478bd9Sstevel@tonic-gate * compression algorithm used. First - the heart of the algorithm 887c478bd9Sstevel@tonic-gate * is a Hash table list which represents a dictionary of all unique 897c478bd9Sstevel@tonic-gate * strings inserted into the string table. The hash function for 907c478bd9Sstevel@tonic-gate * this table is a standard string hash except that the hash starts 917c478bd9Sstevel@tonic-gate * at the last character in the string (&str[n - 1]) and works towards 927c478bd9Sstevel@tonic-gate * the first character in the function (&str[0]). As we compute the 937c478bd9Sstevel@tonic-gate * HASH value for a given string, we also compute the hash values 947c478bd9Sstevel@tonic-gate * for all of the possible suffix strings for that string. 957c478bd9Sstevel@tonic-gate * 967c478bd9Sstevel@tonic-gate * As we compute the hash - at each character see if the current 977c478bd9Sstevel@tonic-gate * suffix string for that hash is already present in the table. If 987c478bd9Sstevel@tonic-gate * it is, and the string is a master string. Then change that 997c478bd9Sstevel@tonic-gate * string to a suffix string of the new string being inserted. 1007c478bd9Sstevel@tonic-gate * 1017c478bd9Sstevel@tonic-gate * When the final hash value is found (hash for str[0...n]), check 1027c478bd9Sstevel@tonic-gate * to see if it is in the hash table - if so increment the reference 1037c478bd9Sstevel@tonic-gate * count for the string. If it is not yet in the table, insert a 1047c478bd9Sstevel@tonic-gate * new hash table entry for a master string. 1057c478bd9Sstevel@tonic-gate * 1067c478bd9Sstevel@tonic-gate * The above method will find all suffixes of a given string given 1077c478bd9Sstevel@tonic-gate * that the strings are inserted from shortest to longest. That is 1087c478bd9Sstevel@tonic-gate * why this is a two phase method, we first collect all of the 109a194faf8Srie * strings and store them based off of their length in an AVL tree. 1107c478bd9Sstevel@tonic-gate * Once all of the strings have been submitted we then start the 1117c478bd9Sstevel@tonic-gate * hash table build by traversing the AVL tree in order and 1127c478bd9Sstevel@tonic-gate * inserting the strings from shortest to longest as described 1137c478bd9Sstevel@tonic-gate * above. 1147c478bd9Sstevel@tonic-gate */ 1157c478bd9Sstevel@tonic-gate 1167c478bd9Sstevel@tonic-gate /* LINTLIBRARY */ 1177c478bd9Sstevel@tonic-gate 118a194faf8Srie static int 119a194faf8Srie avl_len_compare(const void *n1, const void *n2) 1207c478bd9Sstevel@tonic-gate { 121cce0e03bSab196087 size_t len1, len2; 1227c478bd9Sstevel@tonic-gate 123a194faf8Srie len1 = ((LenNode *)n1)->ln_strlen; 124a194faf8Srie len2 = ((LenNode *)n2)->ln_strlen; 125a194faf8Srie 126a194faf8Srie if (len1 == len2) 1277c478bd9Sstevel@tonic-gate return (0); 128a194faf8Srie if (len2 < len1) 1297c478bd9Sstevel@tonic-gate return (1); 1307c478bd9Sstevel@tonic-gate return (-1); 1317c478bd9Sstevel@tonic-gate } 1327c478bd9Sstevel@tonic-gate 133a194faf8Srie static int 134a194faf8Srie avl_str_compare(const void *n1, const void *n2) 135a194faf8Srie { 136a194faf8Srie const char *str1, *str2; 137a194faf8Srie int rc; 138a194faf8Srie 139a194faf8Srie str1 = ((StrNode *)n1)->sn_str; 140a194faf8Srie str2 = ((StrNode *)n2)->sn_str; 141a194faf8Srie 142a194faf8Srie rc = strcmp(str1, str2); 143a194faf8Srie if (rc > 0) 144a194faf8Srie return (1); 145a194faf8Srie if (rc < 0) 146a194faf8Srie return (-1); 147a194faf8Srie return (0); 148a194faf8Srie } 149a194faf8Srie 1507c478bd9Sstevel@tonic-gate /* 151a194faf8Srie * Return an initialized Str_tbl - returns NULL on failure. 1527c478bd9Sstevel@tonic-gate * 153a194faf8Srie * flags: 154a194faf8Srie * FLG_STTAB_COMPRESS - build a compressed string table 1557c478bd9Sstevel@tonic-gate */ 1567c478bd9Sstevel@tonic-gate Str_tbl * 157a194faf8Srie st_new(uint_t flags) 1587c478bd9Sstevel@tonic-gate { 1597c478bd9Sstevel@tonic-gate Str_tbl *stp; 1607c478bd9Sstevel@tonic-gate 161*6b3ba5bdSAli Bahrami if ((stp = calloc(sizeof (*stp), 1)) == NULL) 162a194faf8Srie return (NULL); 1637c478bd9Sstevel@tonic-gate 1647c478bd9Sstevel@tonic-gate /* 1657c478bd9Sstevel@tonic-gate * Start with a leading '\0' - it's tradition. 1667c478bd9Sstevel@tonic-gate */ 167a194faf8Srie stp->st_strsize = stp->st_fullstrsize = stp->st_nextoff = 1; 1687c478bd9Sstevel@tonic-gate 1697c478bd9Sstevel@tonic-gate /* 170a194faf8Srie * Do we compress this string table? 1717c478bd9Sstevel@tonic-gate */ 172a194faf8Srie stp->st_flags = flags; 173a194faf8Srie if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) 1747c478bd9Sstevel@tonic-gate return (stp); 1757c478bd9Sstevel@tonic-gate 176*6b3ba5bdSAli Bahrami if ((stp->st_lentree = calloc(sizeof (*stp->st_lentree), 1)) == NULL) 177a194faf8Srie return (NULL); 178a194faf8Srie 179a194faf8Srie avl_create(stp->st_lentree, &avl_len_compare, sizeof (LenNode), 180a194faf8Srie SGSOFFSETOF(LenNode, ln_avlnode)); 181a194faf8Srie 182a194faf8Srie return (stp); 183a194faf8Srie } 184a194faf8Srie 185a194faf8Srie /* 186a194faf8Srie * Insert a new string into the Str_tbl. There are two AVL trees used. 187a194faf8Srie * 188*6b3ba5bdSAli Bahrami * - The first LenNode AVL tree maintains a tree of nodes based on string 189a194faf8Srie * sizes. 190*6b3ba5bdSAli Bahrami * - Each LenNode maintains a StrNode AVL tree for each string. Large 191a194faf8Srie * applications have been known to contribute thousands of strings of 192a194faf8Srie * the same size. Should strings need to be removed (-z ignore), then 193a194faf8Srie * the string AVL tree makes this removal efficient and scalable. 194a194faf8Srie */ 195a194faf8Srie int 196a194faf8Srie st_insert(Str_tbl *stp, const char *str) 197a194faf8Srie { 198cce0e03bSab196087 size_t len; 199a194faf8Srie StrNode *snp, sn = { 0 }; 200a194faf8Srie LenNode *lnp, ln = { 0 }; 201a194faf8Srie avl_index_t where; 202a194faf8Srie 203a194faf8Srie /* 204a194faf8Srie * String table can't have been cooked 205a194faf8Srie */ 206a194faf8Srie assert((stp->st_flags & FLG_STTAB_COOKED) == 0); 207a194faf8Srie 208a194faf8Srie /* 209a194faf8Srie * Null strings always point to the head of the string 210a194faf8Srie * table - no reason to keep searching. 211a194faf8Srie */ 212cce0e03bSab196087 if ((len = strlen(str)) == 0) 213a194faf8Srie return (0); 214a194faf8Srie 215a194faf8Srie stp->st_fullstrsize += len + 1; 216a194faf8Srie stp->st_strcnt++; 217a194faf8Srie 218a194faf8Srie if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) 219a194faf8Srie return (0); 220a194faf8Srie 221a194faf8Srie /* 222a194faf8Srie * From the controlling string table, determine which LenNode AVL node 223a194faf8Srie * provides for this string length. If the node doesn't exist, insert 224a194faf8Srie * a new node to represent this string length. 225a194faf8Srie */ 226a194faf8Srie ln.ln_strlen = len; 227a194faf8Srie if ((lnp = avl_find(stp->st_lentree, &ln, &where)) == NULL) { 228*6b3ba5bdSAli Bahrami if ((lnp = calloc(sizeof (*lnp), 1)) == NULL) 229a194faf8Srie return (-1); 230a194faf8Srie lnp->ln_strlen = len; 231a194faf8Srie avl_insert(stp->st_lentree, lnp, where); 232a194faf8Srie 233*6b3ba5bdSAli Bahrami if ((lnp->ln_strtree = calloc(sizeof (*lnp->ln_strtree), 1)) == 234*6b3ba5bdSAli Bahrami NULL) 235a194faf8Srie return (0); 236a194faf8Srie 237a194faf8Srie avl_create(lnp->ln_strtree, &avl_str_compare, sizeof (StrNode), 238a194faf8Srie SGSOFFSETOF(StrNode, sn_avlnode)); 239a194faf8Srie } 240a194faf8Srie 241a194faf8Srie /* 242a194faf8Srie * From the string length AVL node determine whether a StrNode AVL node 243a194faf8Srie * provides this string. If the node doesn't exist, insert a new node 244a194faf8Srie * to represent this string. 245a194faf8Srie */ 246a194faf8Srie sn.sn_str = str; 247a194faf8Srie if ((snp = avl_find(lnp->ln_strtree, &sn, &where)) == NULL) { 248*6b3ba5bdSAli Bahrami if ((snp = calloc(sizeof (*snp), 1)) == NULL) 249a194faf8Srie return (-1); 250a194faf8Srie snp->sn_str = str; 251a194faf8Srie avl_insert(lnp->ln_strtree, snp, where); 252a194faf8Srie } 253a194faf8Srie snp->sn_refcnt++; 254a194faf8Srie 2557c478bd9Sstevel@tonic-gate return (0); 2567c478bd9Sstevel@tonic-gate } 2577c478bd9Sstevel@tonic-gate 258a194faf8Srie /* 259a194faf8Srie * Remove a previously inserted string from the Str_tbl. 260a194faf8Srie */ 261a194faf8Srie int 262a194faf8Srie st_delstring(Str_tbl *stp, const char *str) 263a194faf8Srie { 264cce0e03bSab196087 size_t len; 265a194faf8Srie LenNode *lnp, ln = { 0 }; 266a194faf8Srie StrNode *snp, sn = { 0 }; 2677c478bd9Sstevel@tonic-gate 268a194faf8Srie /* 269a194faf8Srie * String table can't have been cooked 270a194faf8Srie */ 271a194faf8Srie assert((stp->st_flags & FLG_STTAB_COOKED) == 0); 272a194faf8Srie 273cce0e03bSab196087 len = strlen(str); 274a194faf8Srie stp->st_fullstrsize -= len + 1; 275a194faf8Srie 276a194faf8Srie if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) 277a194faf8Srie return (0); 278a194faf8Srie 279a194faf8Srie /* 280a194faf8Srie * Determine which LenNode AVL node provides for this string length. 281a194faf8Srie */ 282a194faf8Srie ln.ln_strlen = len; 283a194faf8Srie if ((lnp = avl_find(stp->st_lentree, &ln, 0)) != NULL) { 284a194faf8Srie sn.sn_str = str; 285a194faf8Srie if ((snp = avl_find(lnp->ln_strtree, &sn, 0)) != NULL) { 286a194faf8Srie /* 287a194faf8Srie * Reduce the reference count, and if zero remove the 288a194faf8Srie * node. 289a194faf8Srie */ 290a194faf8Srie if (--snp->sn_refcnt == 0) 291a194faf8Srie avl_remove(lnp->ln_strtree, snp); 292a194faf8Srie return (0); 293a194faf8Srie } 294a194faf8Srie } 295a194faf8Srie 296a194faf8Srie /* 297a194faf8Srie * No strings of this length, or no string itself - someone goofed. 298a194faf8Srie */ 299a194faf8Srie return (-1); 3007c478bd9Sstevel@tonic-gate } 3017c478bd9Sstevel@tonic-gate 3027c478bd9Sstevel@tonic-gate /* 3037c478bd9Sstevel@tonic-gate * Tear down a String_Table structure. 3047c478bd9Sstevel@tonic-gate */ 3057c478bd9Sstevel@tonic-gate void 3067c478bd9Sstevel@tonic-gate st_destroy(Str_tbl *stp) 3077c478bd9Sstevel@tonic-gate { 3087c478bd9Sstevel@tonic-gate Str_hash *sthash, *psthash; 3097c478bd9Sstevel@tonic-gate Str_master *mstr, *pmstr; 3107c478bd9Sstevel@tonic-gate uint_t i; 3117c478bd9Sstevel@tonic-gate 3127c478bd9Sstevel@tonic-gate /* 3137c478bd9Sstevel@tonic-gate * cleanup the master strings 3147c478bd9Sstevel@tonic-gate */ 3157c478bd9Sstevel@tonic-gate for (mstr = stp->st_mstrlist, pmstr = 0; mstr; 3167c478bd9Sstevel@tonic-gate mstr = mstr->sm_next) { 3177c478bd9Sstevel@tonic-gate if (pmstr) 3187c478bd9Sstevel@tonic-gate free(pmstr); 3197c478bd9Sstevel@tonic-gate pmstr = mstr; 3207c478bd9Sstevel@tonic-gate } 3217c478bd9Sstevel@tonic-gate if (pmstr) 3227c478bd9Sstevel@tonic-gate free(pmstr); 3237c478bd9Sstevel@tonic-gate 3247c478bd9Sstevel@tonic-gate if (stp->st_hashbcks) { 3257c478bd9Sstevel@tonic-gate for (i = 0; i < stp->st_hbckcnt; i++) { 3267c478bd9Sstevel@tonic-gate for (sthash = stp->st_hashbcks[i], psthash = 0; 3277c478bd9Sstevel@tonic-gate sthash; sthash = sthash->hi_next) { 3287c478bd9Sstevel@tonic-gate if (psthash) 3297c478bd9Sstevel@tonic-gate free(psthash); 3307c478bd9Sstevel@tonic-gate psthash = sthash; 3317c478bd9Sstevel@tonic-gate } 3327c478bd9Sstevel@tonic-gate if (psthash) 3337c478bd9Sstevel@tonic-gate free(psthash); 3347c478bd9Sstevel@tonic-gate } 3357c478bd9Sstevel@tonic-gate free(stp->st_hashbcks); 3367c478bd9Sstevel@tonic-gate } 3377c478bd9Sstevel@tonic-gate free(stp); 3387c478bd9Sstevel@tonic-gate } 3397c478bd9Sstevel@tonic-gate 3407c478bd9Sstevel@tonic-gate 3417c478bd9Sstevel@tonic-gate /* 3427c478bd9Sstevel@tonic-gate * For a given string - copy it into the buffer associated with 3437c478bd9Sstevel@tonic-gate * the string table - and return the offset it has been assigned. 3447c478bd9Sstevel@tonic-gate * 3457c478bd9Sstevel@tonic-gate * If a value of '-1' is returned - the string was not found in 3467c478bd9Sstevel@tonic-gate * the Str_tbl. 3477c478bd9Sstevel@tonic-gate */ 3487c478bd9Sstevel@tonic-gate int 349cce0e03bSab196087 st_setstring(Str_tbl *stp, const char *str, size_t *stoff) 3507c478bd9Sstevel@tonic-gate { 351cce0e03bSab196087 size_t stlen; 3527c478bd9Sstevel@tonic-gate uint_t hashval; 3537c478bd9Sstevel@tonic-gate Str_hash *sthash; 3547c478bd9Sstevel@tonic-gate Str_master *mstr; 3557c478bd9Sstevel@tonic-gate int i; 3567c478bd9Sstevel@tonic-gate 3577c478bd9Sstevel@tonic-gate /* 3587c478bd9Sstevel@tonic-gate * String table *must* have been previously cooked 3597c478bd9Sstevel@tonic-gate */ 3607c478bd9Sstevel@tonic-gate assert(stp->st_strbuf); 3617c478bd9Sstevel@tonic-gate 3627c478bd9Sstevel@tonic-gate assert(stp->st_flags & FLG_STTAB_COOKED); 363cce0e03bSab196087 stlen = strlen(str); 3647c478bd9Sstevel@tonic-gate /* 3657c478bd9Sstevel@tonic-gate * Null string always points to head of string table 3667c478bd9Sstevel@tonic-gate */ 3677c478bd9Sstevel@tonic-gate if (stlen == 0) { 3687c478bd9Sstevel@tonic-gate *stoff = 0; 3697c478bd9Sstevel@tonic-gate return (0); 3707c478bd9Sstevel@tonic-gate } 3717c478bd9Sstevel@tonic-gate 3727c478bd9Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) { 373cce0e03bSab196087 size_t _stoff; 3747c478bd9Sstevel@tonic-gate 3757c478bd9Sstevel@tonic-gate stlen++; /* count for trailing '\0' */ 3767c478bd9Sstevel@tonic-gate _stoff = stp->st_nextoff; 3777c478bd9Sstevel@tonic-gate /* 3787c478bd9Sstevel@tonic-gate * Have we overflowed our assigned buffer? 3797c478bd9Sstevel@tonic-gate */ 380a194faf8Srie if ((_stoff + stlen) > stp->st_fullstrsize) 3817c478bd9Sstevel@tonic-gate return (-1); 3827c478bd9Sstevel@tonic-gate memcpy(stp->st_strbuf + _stoff, str, stlen); 3837c478bd9Sstevel@tonic-gate *stoff = _stoff; 3847c478bd9Sstevel@tonic-gate stp->st_nextoff += stlen; 3857c478bd9Sstevel@tonic-gate return (0); 3867c478bd9Sstevel@tonic-gate } 3877c478bd9Sstevel@tonic-gate 3887c478bd9Sstevel@tonic-gate /* 389a194faf8Srie * Calculate reverse hash for string. 3907c478bd9Sstevel@tonic-gate */ 3917c478bd9Sstevel@tonic-gate hashval = HASHSEED; 3927c478bd9Sstevel@tonic-gate for (i = stlen; i >= 0; i--) { 3937c478bd9Sstevel@tonic-gate hashval = ((hashval << 5) + hashval) + 3947c478bd9Sstevel@tonic-gate str[i]; /* h = ((h * 33) + c) */ 3957c478bd9Sstevel@tonic-gate } 3967c478bd9Sstevel@tonic-gate 3977c478bd9Sstevel@tonic-gate for (sthash = stp->st_hashbcks[hashval % stp->st_hbckcnt]; sthash; 3987c478bd9Sstevel@tonic-gate sthash = sthash->hi_next) { 3997c478bd9Sstevel@tonic-gate const char *hstr; 4007c478bd9Sstevel@tonic-gate 401a194faf8Srie if (sthash->hi_hashval != hashval) 402a194faf8Srie continue; 403a194faf8Srie 404a194faf8Srie hstr = &sthash->hi_mstr->sm_str[sthash->hi_mstr->sm_strlen - 405a194faf8Srie sthash->hi_strlen]; 406a194faf8Srie if (strcmp(str, hstr) == 0) 4077c478bd9Sstevel@tonic-gate break; 4087c478bd9Sstevel@tonic-gate } 4097c478bd9Sstevel@tonic-gate 4107c478bd9Sstevel@tonic-gate /* 4117c478bd9Sstevel@tonic-gate * Did we find the string? 4127c478bd9Sstevel@tonic-gate */ 4137c478bd9Sstevel@tonic-gate if (sthash == 0) 4147c478bd9Sstevel@tonic-gate return (-1); 4157c478bd9Sstevel@tonic-gate 4167c478bd9Sstevel@tonic-gate /* 4177c478bd9Sstevel@tonic-gate * Has this string been copied into the string table? 4187c478bd9Sstevel@tonic-gate */ 4197c478bd9Sstevel@tonic-gate mstr = sthash->hi_mstr; 420a194faf8Srie if (mstr->sm_stroff == 0) { 421cce0e03bSab196087 size_t mstrlen = mstr->sm_strlen + 1; 422a194faf8Srie 423a194faf8Srie mstr->sm_stroff = stp->st_nextoff; 424a194faf8Srie 4257c478bd9Sstevel@tonic-gate /* 4267c478bd9Sstevel@tonic-gate * Have we overflowed our assigned buffer? 4277c478bd9Sstevel@tonic-gate */ 428a194faf8Srie if ((mstr->sm_stroff + mstrlen) > stp->st_fullstrsize) 4297c478bd9Sstevel@tonic-gate return (-1); 430a194faf8Srie 431a194faf8Srie (void) memcpy(stp->st_strbuf + mstr->sm_stroff, 432a194faf8Srie mstr->sm_str, mstrlen); 433a194faf8Srie stp->st_nextoff += mstrlen; 4347c478bd9Sstevel@tonic-gate } 435a194faf8Srie 4367c478bd9Sstevel@tonic-gate /* 437a194faf8Srie * Calculate offset of (sub)string. 4387c478bd9Sstevel@tonic-gate */ 439a194faf8Srie *stoff = mstr->sm_stroff + mstr->sm_strlen - sthash->hi_strlen; 4407c478bd9Sstevel@tonic-gate 4417c478bd9Sstevel@tonic-gate return (0); 4427c478bd9Sstevel@tonic-gate } 4437c478bd9Sstevel@tonic-gate 4447c478bd9Sstevel@tonic-gate 4457c478bd9Sstevel@tonic-gate static int 446cce0e03bSab196087 st_hash_insert(Str_tbl *stp, const char *str, size_t len) 4477c478bd9Sstevel@tonic-gate { 4487c478bd9Sstevel@tonic-gate int i; 4497c478bd9Sstevel@tonic-gate uint_t hashval = HASHSEED; 4507c478bd9Sstevel@tonic-gate uint_t bckcnt = stp->st_hbckcnt; 4517c478bd9Sstevel@tonic-gate Str_hash **hashbcks = stp->st_hashbcks; 4527c478bd9Sstevel@tonic-gate Str_hash *sthash; 4537c478bd9Sstevel@tonic-gate Str_master *mstr = 0; 4547c478bd9Sstevel@tonic-gate 4557c478bd9Sstevel@tonic-gate /* 4567c478bd9Sstevel@tonic-gate * We use a classic 'Bernstein k=33' hash function. But 4577c478bd9Sstevel@tonic-gate * instead of hashing from the start of the string to the 4587c478bd9Sstevel@tonic-gate * end, we do it in reverse. 4597c478bd9Sstevel@tonic-gate * 4607c478bd9Sstevel@tonic-gate * This way - we are essentially building all of the 4617c478bd9Sstevel@tonic-gate * suffix hashvalues as we go. We can check to see if 4627c478bd9Sstevel@tonic-gate * any suffixes already exist in the tree as we generate 4637c478bd9Sstevel@tonic-gate * the hash. 4647c478bd9Sstevel@tonic-gate */ 465a194faf8Srie for (i = len; i >= 0; i--) { 4667c478bd9Sstevel@tonic-gate hashval = ((hashval << 5) + hashval) + 4677c478bd9Sstevel@tonic-gate str[i]; /* h = ((h * 33) + c) */ 468a194faf8Srie 4697c478bd9Sstevel@tonic-gate for (sthash = hashbcks[hashval % bckcnt]; 4707c478bd9Sstevel@tonic-gate sthash; sthash = sthash->hi_next) { 4717c478bd9Sstevel@tonic-gate const char *hstr; 4727c478bd9Sstevel@tonic-gate Str_master *_mstr; 4737c478bd9Sstevel@tonic-gate 474a194faf8Srie if (sthash->hi_hashval != hashval) 475a194faf8Srie continue; 476a194faf8Srie 4777c478bd9Sstevel@tonic-gate _mstr = sthash->hi_mstr; 478a194faf8Srie hstr = &_mstr->sm_str[_mstr->sm_strlen - 479a194faf8Srie sthash->hi_strlen]; 480a194faf8Srie 481a194faf8Srie if (strcmp(&str[i], hstr)) 482a194faf8Srie continue; 483a194faf8Srie 4847c478bd9Sstevel@tonic-gate if (i == 0) { 4857c478bd9Sstevel@tonic-gate /* 486a194faf8Srie * Entry already in table, increment refcnt and 487a194faf8Srie * get out. 4887c478bd9Sstevel@tonic-gate */ 4897c478bd9Sstevel@tonic-gate sthash->hi_refcnt++; 4907c478bd9Sstevel@tonic-gate return (0); 4917c478bd9Sstevel@tonic-gate } else { 4927c478bd9Sstevel@tonic-gate /* 493a194faf8Srie * If this 'suffix' is presently a 'master 494a194faf8Srie * string, then take over it's record. 4957c478bd9Sstevel@tonic-gate */ 496a194faf8Srie if (sthash->hi_strlen == _mstr->sm_strlen) { 4977c478bd9Sstevel@tonic-gate /* 498a194faf8Srie * we should only do this once. 4997c478bd9Sstevel@tonic-gate */ 5007c478bd9Sstevel@tonic-gate assert(mstr == 0); 5017c478bd9Sstevel@tonic-gate mstr = _mstr; 5027c478bd9Sstevel@tonic-gate } 5037c478bd9Sstevel@tonic-gate } 5047c478bd9Sstevel@tonic-gate } 5057c478bd9Sstevel@tonic-gate } 5067c478bd9Sstevel@tonic-gate 5077c478bd9Sstevel@tonic-gate /* 5087c478bd9Sstevel@tonic-gate * Do we need a new master string, or can we take over 5097c478bd9Sstevel@tonic-gate * one we already found in the table? 5107c478bd9Sstevel@tonic-gate */ 5117c478bd9Sstevel@tonic-gate if (mstr == 0) { 5127c478bd9Sstevel@tonic-gate /* 5137c478bd9Sstevel@tonic-gate * allocate a new master string 5147c478bd9Sstevel@tonic-gate */ 515*6b3ba5bdSAli Bahrami if ((mstr = calloc(sizeof (*mstr), 1)) == 0) 5167c478bd9Sstevel@tonic-gate return (-1); 5177c478bd9Sstevel@tonic-gate mstr->sm_next = stp->st_mstrlist; 5187c478bd9Sstevel@tonic-gate stp->st_mstrlist = mstr; 519a194faf8Srie stp->st_strsize += len + 1; 5207c478bd9Sstevel@tonic-gate } else { 5217c478bd9Sstevel@tonic-gate /* 522a194faf8Srie * We are taking over a existing master string, the string size 523a194faf8Srie * only increments by the difference between the current string 524a194faf8Srie * and the previous master. 5257c478bd9Sstevel@tonic-gate */ 526a194faf8Srie assert(len > mstr->sm_strlen); 527a194faf8Srie stp->st_strsize += len - mstr->sm_strlen; 5287c478bd9Sstevel@tonic-gate } 5297c478bd9Sstevel@tonic-gate 530*6b3ba5bdSAli Bahrami if ((sthash = calloc(sizeof (*sthash), 1)) == 0) 5317c478bd9Sstevel@tonic-gate return (-1); 5327c478bd9Sstevel@tonic-gate 5337c478bd9Sstevel@tonic-gate mstr->sm_hashval = sthash->hi_hashval = hashval; 534a194faf8Srie mstr->sm_strlen = sthash->hi_strlen = len; 5357c478bd9Sstevel@tonic-gate mstr->sm_str = str; 5367c478bd9Sstevel@tonic-gate sthash->hi_refcnt = 1; 5377c478bd9Sstevel@tonic-gate sthash->hi_mstr = mstr; 5387c478bd9Sstevel@tonic-gate 5397c478bd9Sstevel@tonic-gate /* 5407c478bd9Sstevel@tonic-gate * Insert string element into head of hash list 5417c478bd9Sstevel@tonic-gate */ 5427c478bd9Sstevel@tonic-gate hashval = hashval % bckcnt; 5437c478bd9Sstevel@tonic-gate sthash->hi_next = hashbcks[hashval]; 5447c478bd9Sstevel@tonic-gate hashbcks[hashval] = sthash; 5457c478bd9Sstevel@tonic-gate return (0); 5467c478bd9Sstevel@tonic-gate } 5477c478bd9Sstevel@tonic-gate 5487c478bd9Sstevel@tonic-gate /* 5497c478bd9Sstevel@tonic-gate * Return amount of space required for the string table. 5507c478bd9Sstevel@tonic-gate */ 551cce0e03bSab196087 size_t 5527c478bd9Sstevel@tonic-gate st_getstrtab_sz(Str_tbl *stp) 5537c478bd9Sstevel@tonic-gate { 554a194faf8Srie assert(stp->st_fullstrsize > 0); 5557c478bd9Sstevel@tonic-gate 5567c478bd9Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) { 5577c478bd9Sstevel@tonic-gate stp->st_flags |= FLG_STTAB_COOKED; 558a194faf8Srie return (stp->st_fullstrsize); 5597c478bd9Sstevel@tonic-gate } 5607c478bd9Sstevel@tonic-gate 5617c478bd9Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COOKED) == 0) { 562a194faf8Srie LenNode *lnp; 5637c478bd9Sstevel@tonic-gate void *cookie; 5647c478bd9Sstevel@tonic-gate 5657c478bd9Sstevel@tonic-gate stp->st_flags |= FLG_STTAB_COOKED; 5667c478bd9Sstevel@tonic-gate /* 5677c478bd9Sstevel@tonic-gate * allocate a hash table about the size of # of 5687c478bd9Sstevel@tonic-gate * strings input. 5697c478bd9Sstevel@tonic-gate */ 570a194faf8Srie stp->st_hbckcnt = findprime(stp->st_strcnt); 571*6b3ba5bdSAli Bahrami if ((stp->st_hashbcks = calloc(sizeof (*stp->st_hashbcks), 572*6b3ba5bdSAli Bahrami stp->st_hbckcnt)) == NULL) 5737c478bd9Sstevel@tonic-gate return (0); 5747c478bd9Sstevel@tonic-gate 5757c478bd9Sstevel@tonic-gate /* 576a194faf8Srie * We now walk all of the strings in the list, from shortest to 577a194faf8Srie * longest, and insert them into the hashtable. 5787c478bd9Sstevel@tonic-gate */ 579a194faf8Srie if ((lnp = avl_first(stp->st_lentree)) == NULL) { 5807c478bd9Sstevel@tonic-gate /* 581a194faf8Srie * Is it possible we have an empty string table, if so, 582a194faf8Srie * the table still contains '\0', so return the size. 5837c478bd9Sstevel@tonic-gate */ 584a194faf8Srie if (avl_numnodes(stp->st_lentree) == 0) { 585a194faf8Srie assert(stp->st_strsize == 1); 586a194faf8Srie return (stp->st_strsize); 5877c478bd9Sstevel@tonic-gate } 5887c478bd9Sstevel@tonic-gate return (0); 5897c478bd9Sstevel@tonic-gate } 590a194faf8Srie 591a194faf8Srie while (lnp) { 592a194faf8Srie StrNode *snp; 5937c478bd9Sstevel@tonic-gate 5947c478bd9Sstevel@tonic-gate /* 595a194faf8Srie * Walk the string lists and insert them into the hash 596a194faf8Srie * list. Once a string is inserted we no longer need 597a194faf8Srie * it's entry, so the string can be freed. 5987c478bd9Sstevel@tonic-gate */ 599a194faf8Srie for (snp = avl_first(lnp->ln_strtree); snp; 600a194faf8Srie snp = AVL_NEXT(lnp->ln_strtree, snp)) { 601a194faf8Srie if (st_hash_insert(stp, snp->sn_str, 602a194faf8Srie lnp->ln_strlen) == -1) 6037c478bd9Sstevel@tonic-gate return (0); 6047c478bd9Sstevel@tonic-gate } 6057c478bd9Sstevel@tonic-gate 6067c478bd9Sstevel@tonic-gate /* 607a194faf8Srie * Now that the strings have been copied, walk the 608a194faf8Srie * StrNode tree and free all the AVL nodes. Note, 609a194faf8Srie * avl_destroy_nodes() beats avl_remove() as the 610a194faf8Srie * latter balances the nodes as they are removed. 611a194faf8Srie * We just want to tear the whole thing down fast. 6127c478bd9Sstevel@tonic-gate */ 6137c478bd9Sstevel@tonic-gate cookie = NULL; 614a194faf8Srie while ((snp = avl_destroy_nodes(lnp->ln_strtree, 6157c478bd9Sstevel@tonic-gate &cookie)) != NULL) 616a194faf8Srie free(snp); 617a194faf8Srie avl_destroy(lnp->ln_strtree); 618a194faf8Srie free(lnp->ln_strtree); 619a194faf8Srie lnp->ln_strtree = NULL; 6207c478bd9Sstevel@tonic-gate 621a194faf8Srie /* 622a194faf8Srie * Move on to the next LenNode. 623a194faf8Srie */ 624a194faf8Srie lnp = AVL_NEXT(stp->st_lentree, lnp); 6257c478bd9Sstevel@tonic-gate } 6267c478bd9Sstevel@tonic-gate 6277c478bd9Sstevel@tonic-gate /* 628a194faf8Srie * Now that all of the strings have been freed, walk the 629a194faf8Srie * LenNode tree and free all of the AVL nodes. Note, 630a194faf8Srie * avl_destroy_nodes() beats avl_remove() as the latter 631a194faf8Srie * balances the nodes as they are removed. We just want to 632a194faf8Srie * tear the whole thing down fast. 633a194faf8Srie */ 634a194faf8Srie cookie = NULL; 635a194faf8Srie while ((lnp = avl_destroy_nodes(stp->st_lentree, 636a194faf8Srie &cookie)) != NULL) 637a194faf8Srie free(lnp); 638a194faf8Srie avl_destroy(stp->st_lentree); 639a194faf8Srie free(stp->st_lentree); 640a194faf8Srie stp->st_lentree = 0; 641a194faf8Srie } 642a194faf8Srie 643a194faf8Srie assert(stp->st_strsize > 0); 644a194faf8Srie assert(stp->st_fullstrsize >= stp->st_strsize); 645a194faf8Srie 646a194faf8Srie return (stp->st_strsize); 647a194faf8Srie } 648a194faf8Srie 649a194faf8Srie /* 650a194faf8Srie * Associate a buffer with a string table. 6517c478bd9Sstevel@tonic-gate */ 6527c478bd9Sstevel@tonic-gate const char * 6537c478bd9Sstevel@tonic-gate st_getstrbuf(Str_tbl *stp) 6547c478bd9Sstevel@tonic-gate { 6557c478bd9Sstevel@tonic-gate return (stp->st_strbuf); 6567c478bd9Sstevel@tonic-gate } 6577c478bd9Sstevel@tonic-gate 6587c478bd9Sstevel@tonic-gate int 659cce0e03bSab196087 st_setstrbuf(Str_tbl *stp, char *stbuf, size_t bufsize) 6607c478bd9Sstevel@tonic-gate { 6617c478bd9Sstevel@tonic-gate assert(stp->st_flags & FLG_STTAB_COOKED); 6627c478bd9Sstevel@tonic-gate 6637c478bd9Sstevel@tonic-gate if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) { 664a194faf8Srie if (bufsize < stp->st_fullstrsize) 6657c478bd9Sstevel@tonic-gate return (-1); 6667c478bd9Sstevel@tonic-gate } else { 667a194faf8Srie if (bufsize < stp->st_strsize) 6687c478bd9Sstevel@tonic-gate return (-1); 6697c478bd9Sstevel@tonic-gate } 6707c478bd9Sstevel@tonic-gate 6717c478bd9Sstevel@tonic-gate stp->st_strbuf = stbuf; 6727c478bd9Sstevel@tonic-gate #ifdef DEBUG 6737c478bd9Sstevel@tonic-gate /* 6747c478bd9Sstevel@tonic-gate * for debug builds - start with a stringtable filled in 675*6b3ba5bdSAli Bahrami * with '0xff'. This makes it very easy to spot unfilled 676*6b3ba5bdSAli Bahrami * holes in the strtab. 6777c478bd9Sstevel@tonic-gate */ 6787c478bd9Sstevel@tonic-gate memset(stbuf, 0xff, bufsize); 6797c478bd9Sstevel@tonic-gate stbuf[0] = '\0'; 6807c478bd9Sstevel@tonic-gate #else 6817c478bd9Sstevel@tonic-gate memset(stbuf, 0x0, bufsize); 6827c478bd9Sstevel@tonic-gate #endif 6837c478bd9Sstevel@tonic-gate return (0); 6847c478bd9Sstevel@tonic-gate } 685