xref: /titanic_51/usr/src/cmd/sgs/tools/common/string_table.c (revision 6b3ba5bde920961dbf915c89b7736f96ff487d20)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
55aefb655Srie  * Common Development and Distribution License (the "License").
65aefb655Srie  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
215aefb655Srie 
227c478bd9Sstevel@tonic-gate /*
23*6b3ba5bdSAli Bahrami  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
27a194faf8Srie #include <_string_table.h>
287c478bd9Sstevel@tonic-gate #include <strings.h>
297c478bd9Sstevel@tonic-gate #include <sgs.h>
307c478bd9Sstevel@tonic-gate #include <stdio.h>
317c478bd9Sstevel@tonic-gate 
327c478bd9Sstevel@tonic-gate /*
33a194faf8Srie  * This file provides the interfaces to build a Str_tbl suitable for use by
34a194faf8Srie  * either the sgsmsg message system, or a standard ELF string table (SHT_STRTAB)
35a194faf8Srie  * as created by ld(1).
367c478bd9Sstevel@tonic-gate  *
37a194faf8Srie  * There are two modes which can be used when constructing a string table:
387c478bd9Sstevel@tonic-gate  *
397c478bd9Sstevel@tonic-gate  *	st_new(0)
407c478bd9Sstevel@tonic-gate  *		standard string table - no compression.  This is the
41a194faf8Srie  *		traditional, fast method.
427c478bd9Sstevel@tonic-gate  *
43a194faf8Srie  *	st_new(FLG_STTAB_COMPRESS)
44a194faf8Srie  *		builds a compressed string table which both eliminates
45a194faf8Srie  *		duplicate strings, and permits strings with common suffixes
46a194faf8Srie  *		(atexit vs. exit) to overlap in the table.  This provides space
47a194faf8Srie  *		savings for many string tables.  Although more work than the
48a194faf8Srie  *		traditional method, the algorithms used are designed to scale
49a194faf8Srie  *		and keep any overhead at a minimum.
507c478bd9Sstevel@tonic-gate  *
51a194faf8Srie  * These string tables are built with a common interface in a two-pass manner.
52a194faf8Srie  * The first pass finds all of the strings required for the string-table and
53a194faf8Srie  * calculates the size required for the final string table.
547c478bd9Sstevel@tonic-gate  *
55a194faf8Srie  * The second pass allocates the string table, populates the strings into the
56a194faf8Srie  * table and returns the offsets the strings have been assigned.
577c478bd9Sstevel@tonic-gate  *
587c478bd9Sstevel@tonic-gate  * The calling sequence to build and populate a string table is:
597c478bd9Sstevel@tonic-gate  *
607c478bd9Sstevel@tonic-gate  *		st_new();		// initialize strtab
617c478bd9Sstevel@tonic-gate  *
627c478bd9Sstevel@tonic-gate  *		st_insert(st1);		// first pass of strings ...
637c478bd9Sstevel@tonic-gate  *					// calculates size required for
647c478bd9Sstevel@tonic-gate  *					// string table
657c478bd9Sstevel@tonic-gate  *
667c478bd9Sstevel@tonic-gate  *		st_delstring(st?);	// remove string previously
677c478bd9Sstevel@tonic-gate  *					// inserted
687c478bd9Sstevel@tonic-gate  *		st_insert(stN);
697c478bd9Sstevel@tonic-gate  *
707c478bd9Sstevel@tonic-gate  *		st_getstrtab_sz();	// freezes strtab and computes
717c478bd9Sstevel@tonic-gate  *					// size of table.
727c478bd9Sstevel@tonic-gate  *
737c478bd9Sstevel@tonic-gate  *		st_setstrbuf();		// associates a final destination
747c478bd9Sstevel@tonic-gate  *					// for the string table
757c478bd9Sstevel@tonic-gate  *
767c478bd9Sstevel@tonic-gate  *		st_setstring(st1);	// populate the string table
777c478bd9Sstevel@tonic-gate  *		...			// offsets are based off of second
787c478bd9Sstevel@tonic-gate  *					// pass	through the string table
797c478bd9Sstevel@tonic-gate  *		st_setstring(stN);
807c478bd9Sstevel@tonic-gate  *
817c478bd9Sstevel@tonic-gate  *		st_destroy();		// tear down string table
827c478bd9Sstevel@tonic-gate  *					// structures.
837c478bd9Sstevel@tonic-gate  *
847c478bd9Sstevel@tonic-gate  * String Suffix Compression Algorithm:
857c478bd9Sstevel@tonic-gate  *
867c478bd9Sstevel@tonic-gate  *   Here's a quick high level overview of the Suffix String
877c478bd9Sstevel@tonic-gate  *   compression algorithm used.  First - the heart of the algorithm
887c478bd9Sstevel@tonic-gate  *   is a Hash table list which represents a dictionary of all unique
897c478bd9Sstevel@tonic-gate  *   strings inserted into the string table.  The hash function for
907c478bd9Sstevel@tonic-gate  *   this table is a standard string hash except that the hash starts
917c478bd9Sstevel@tonic-gate  *   at the last character in the string (&str[n - 1]) and works towards
927c478bd9Sstevel@tonic-gate  *   the first character in the function (&str[0]).  As we compute the
937c478bd9Sstevel@tonic-gate  *   HASH value for a given string, we also compute the hash values
947c478bd9Sstevel@tonic-gate  *   for all of the possible suffix strings for that string.
957c478bd9Sstevel@tonic-gate  *
967c478bd9Sstevel@tonic-gate  *   As we compute the hash - at each character see if the current
977c478bd9Sstevel@tonic-gate  *   suffix string for that hash is already present in the table.  If
987c478bd9Sstevel@tonic-gate  *   it is, and the string is a master string.  Then change that
997c478bd9Sstevel@tonic-gate  *   string to a suffix string of the new string being inserted.
1007c478bd9Sstevel@tonic-gate  *
1017c478bd9Sstevel@tonic-gate  *   When the final hash value is found (hash for str[0...n]), check
1027c478bd9Sstevel@tonic-gate  *   to see if it is in the hash table - if so increment the reference
1037c478bd9Sstevel@tonic-gate  *   count for the string.  If it is not yet in the table, insert a
1047c478bd9Sstevel@tonic-gate  *   new hash table entry for a master string.
1057c478bd9Sstevel@tonic-gate  *
1067c478bd9Sstevel@tonic-gate  *   The above method will find all suffixes of a given string given
1077c478bd9Sstevel@tonic-gate  *   that the strings are inserted from shortest to longest.  That is
1087c478bd9Sstevel@tonic-gate  *   why this is a two phase method, we first collect all of the
109a194faf8Srie  *   strings and store them based off of their length in an AVL tree.
1107c478bd9Sstevel@tonic-gate  *   Once all of the strings have been submitted we then start the
1117c478bd9Sstevel@tonic-gate  *   hash table build by traversing the AVL tree in order and
1127c478bd9Sstevel@tonic-gate  *   inserting the strings from shortest to longest as described
1137c478bd9Sstevel@tonic-gate  *   above.
1147c478bd9Sstevel@tonic-gate  */
1157c478bd9Sstevel@tonic-gate 
1167c478bd9Sstevel@tonic-gate /* LINTLIBRARY */
1177c478bd9Sstevel@tonic-gate 
118a194faf8Srie static int
119a194faf8Srie avl_len_compare(const void *n1, const void *n2)
1207c478bd9Sstevel@tonic-gate {
121cce0e03bSab196087 	size_t	len1, len2;
1227c478bd9Sstevel@tonic-gate 
123a194faf8Srie 	len1 = ((LenNode *)n1)->ln_strlen;
124a194faf8Srie 	len2 = ((LenNode *)n2)->ln_strlen;
125a194faf8Srie 
126a194faf8Srie 	if (len1 == len2)
1277c478bd9Sstevel@tonic-gate 		return (0);
128a194faf8Srie 	if (len2 < len1)
1297c478bd9Sstevel@tonic-gate 		return (1);
1307c478bd9Sstevel@tonic-gate 	return (-1);
1317c478bd9Sstevel@tonic-gate }
1327c478bd9Sstevel@tonic-gate 
133a194faf8Srie static int
134a194faf8Srie avl_str_compare(const void *n1, const void *n2)
135a194faf8Srie {
136a194faf8Srie 	const char	*str1, *str2;
137a194faf8Srie 	int		rc;
138a194faf8Srie 
139a194faf8Srie 	str1 = ((StrNode *)n1)->sn_str;
140a194faf8Srie 	str2 = ((StrNode *)n2)->sn_str;
141a194faf8Srie 
142a194faf8Srie 	rc = strcmp(str1, str2);
143a194faf8Srie 	if (rc > 0)
144a194faf8Srie 		return (1);
145a194faf8Srie 	if (rc < 0)
146a194faf8Srie 		return (-1);
147a194faf8Srie 	return (0);
148a194faf8Srie }
149a194faf8Srie 
1507c478bd9Sstevel@tonic-gate /*
151a194faf8Srie  * Return an initialized Str_tbl - returns NULL on failure.
1527c478bd9Sstevel@tonic-gate  *
153a194faf8Srie  * flags:
154a194faf8Srie  *	FLG_STTAB_COMPRESS - build a compressed string table
1557c478bd9Sstevel@tonic-gate  */
1567c478bd9Sstevel@tonic-gate Str_tbl *
157a194faf8Srie st_new(uint_t flags)
1587c478bd9Sstevel@tonic-gate {
1597c478bd9Sstevel@tonic-gate 	Str_tbl	*stp;
1607c478bd9Sstevel@tonic-gate 
161*6b3ba5bdSAli Bahrami 	if ((stp = calloc(sizeof (*stp), 1)) == NULL)
162a194faf8Srie 		return (NULL);
1637c478bd9Sstevel@tonic-gate 
1647c478bd9Sstevel@tonic-gate 	/*
1657c478bd9Sstevel@tonic-gate 	 * Start with a leading '\0' - it's tradition.
1667c478bd9Sstevel@tonic-gate 	 */
167a194faf8Srie 	stp->st_strsize = stp->st_fullstrsize = stp->st_nextoff = 1;
1687c478bd9Sstevel@tonic-gate 
1697c478bd9Sstevel@tonic-gate 	/*
170a194faf8Srie 	 * Do we compress this string table?
1717c478bd9Sstevel@tonic-gate 	 */
172a194faf8Srie 	stp->st_flags = flags;
173a194faf8Srie 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0)
1747c478bd9Sstevel@tonic-gate 		return (stp);
1757c478bd9Sstevel@tonic-gate 
176*6b3ba5bdSAli Bahrami 	if ((stp->st_lentree = calloc(sizeof (*stp->st_lentree), 1)) == NULL)
177a194faf8Srie 		return (NULL);
178a194faf8Srie 
179a194faf8Srie 	avl_create(stp->st_lentree, &avl_len_compare, sizeof (LenNode),
180a194faf8Srie 	    SGSOFFSETOF(LenNode, ln_avlnode));
181a194faf8Srie 
182a194faf8Srie 	return (stp);
183a194faf8Srie }
184a194faf8Srie 
185a194faf8Srie /*
186a194faf8Srie  * Insert a new string into the Str_tbl.  There are two AVL trees used.
187a194faf8Srie  *
188*6b3ba5bdSAli Bahrami  *  -	The first LenNode AVL tree maintains a tree of nodes based on string
189a194faf8Srie  *	sizes.
190*6b3ba5bdSAli Bahrami  *  -	Each LenNode maintains a StrNode AVL tree for each string.  Large
191a194faf8Srie  *	applications have been known to contribute thousands of strings of
192a194faf8Srie  *	the same size.  Should strings need to be removed (-z ignore), then
193a194faf8Srie  *	the string AVL tree makes this removal efficient and scalable.
194a194faf8Srie  */
195a194faf8Srie int
196a194faf8Srie st_insert(Str_tbl *stp, const char *str)
197a194faf8Srie {
198cce0e03bSab196087 	size_t		len;
199a194faf8Srie 	StrNode		*snp, sn = { 0 };
200a194faf8Srie 	LenNode		*lnp, ln = { 0 };
201a194faf8Srie 	avl_index_t	where;
202a194faf8Srie 
203a194faf8Srie 	/*
204a194faf8Srie 	 * String table can't have been cooked
205a194faf8Srie 	 */
206a194faf8Srie 	assert((stp->st_flags & FLG_STTAB_COOKED) == 0);
207a194faf8Srie 
208a194faf8Srie 	/*
209a194faf8Srie 	 * Null strings always point to the head of the string
210a194faf8Srie 	 * table - no reason to keep searching.
211a194faf8Srie 	 */
212cce0e03bSab196087 	if ((len = strlen(str)) == 0)
213a194faf8Srie 		return (0);
214a194faf8Srie 
215a194faf8Srie 	stp->st_fullstrsize += len + 1;
216a194faf8Srie 	stp->st_strcnt++;
217a194faf8Srie 
218a194faf8Srie 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0)
219a194faf8Srie 		return (0);
220a194faf8Srie 
221a194faf8Srie 	/*
222a194faf8Srie 	 * From the controlling string table, determine which LenNode AVL node
223a194faf8Srie 	 * provides for this string length.  If the node doesn't exist, insert
224a194faf8Srie 	 * a new node to represent this string length.
225a194faf8Srie 	 */
226a194faf8Srie 	ln.ln_strlen = len;
227a194faf8Srie 	if ((lnp = avl_find(stp->st_lentree, &ln, &where)) == NULL) {
228*6b3ba5bdSAli Bahrami 		if ((lnp = calloc(sizeof (*lnp), 1)) == NULL)
229a194faf8Srie 			return (-1);
230a194faf8Srie 		lnp->ln_strlen = len;
231a194faf8Srie 		avl_insert(stp->st_lentree, lnp, where);
232a194faf8Srie 
233*6b3ba5bdSAli Bahrami 		if ((lnp->ln_strtree = calloc(sizeof (*lnp->ln_strtree), 1)) ==
234*6b3ba5bdSAli Bahrami 		    NULL)
235a194faf8Srie 			return (0);
236a194faf8Srie 
237a194faf8Srie 		avl_create(lnp->ln_strtree, &avl_str_compare, sizeof (StrNode),
238a194faf8Srie 		    SGSOFFSETOF(StrNode, sn_avlnode));
239a194faf8Srie 	}
240a194faf8Srie 
241a194faf8Srie 	/*
242a194faf8Srie 	 * From the string length AVL node determine whether a StrNode AVL node
243a194faf8Srie 	 * provides this string.  If the node doesn't exist, insert a new node
244a194faf8Srie 	 * to represent this string.
245a194faf8Srie 	 */
246a194faf8Srie 	sn.sn_str = str;
247a194faf8Srie 	if ((snp = avl_find(lnp->ln_strtree, &sn, &where)) == NULL) {
248*6b3ba5bdSAli Bahrami 		if ((snp = calloc(sizeof (*snp), 1)) == NULL)
249a194faf8Srie 			return (-1);
250a194faf8Srie 		snp->sn_str = str;
251a194faf8Srie 		avl_insert(lnp->ln_strtree, snp, where);
252a194faf8Srie 	}
253a194faf8Srie 	snp->sn_refcnt++;
254a194faf8Srie 
2557c478bd9Sstevel@tonic-gate 	return (0);
2567c478bd9Sstevel@tonic-gate }
2577c478bd9Sstevel@tonic-gate 
258a194faf8Srie /*
259a194faf8Srie  * Remove a previously inserted string from the Str_tbl.
260a194faf8Srie  */
261a194faf8Srie int
262a194faf8Srie st_delstring(Str_tbl *stp, const char *str)
263a194faf8Srie {
264cce0e03bSab196087 	size_t		len;
265a194faf8Srie 	LenNode		*lnp, ln = { 0 };
266a194faf8Srie 	StrNode		*snp, sn = { 0 };
2677c478bd9Sstevel@tonic-gate 
268a194faf8Srie 	/*
269a194faf8Srie 	 * String table can't have been cooked
270a194faf8Srie 	 */
271a194faf8Srie 	assert((stp->st_flags & FLG_STTAB_COOKED) == 0);
272a194faf8Srie 
273cce0e03bSab196087 	len = strlen(str);
274a194faf8Srie 	stp->st_fullstrsize -= len + 1;
275a194faf8Srie 
276a194faf8Srie 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0)
277a194faf8Srie 		return (0);
278a194faf8Srie 
279a194faf8Srie 	/*
280a194faf8Srie 	 * Determine which LenNode AVL node provides for this string length.
281a194faf8Srie 	 */
282a194faf8Srie 	ln.ln_strlen = len;
283a194faf8Srie 	if ((lnp = avl_find(stp->st_lentree, &ln, 0)) != NULL) {
284a194faf8Srie 		sn.sn_str = str;
285a194faf8Srie 		if ((snp = avl_find(lnp->ln_strtree, &sn, 0)) != NULL) {
286a194faf8Srie 			/*
287a194faf8Srie 			 * Reduce the reference count, and if zero remove the
288a194faf8Srie 			 * node.
289a194faf8Srie 			 */
290a194faf8Srie 			if (--snp->sn_refcnt == 0)
291a194faf8Srie 				avl_remove(lnp->ln_strtree, snp);
292a194faf8Srie 			return (0);
293a194faf8Srie 		}
294a194faf8Srie 	}
295a194faf8Srie 
296a194faf8Srie 	/*
297a194faf8Srie 	 * No strings of this length, or no string itself - someone goofed.
298a194faf8Srie 	 */
299a194faf8Srie 	return (-1);
3007c478bd9Sstevel@tonic-gate }
3017c478bd9Sstevel@tonic-gate 
3027c478bd9Sstevel@tonic-gate /*
3037c478bd9Sstevel@tonic-gate  * Tear down a String_Table structure.
3047c478bd9Sstevel@tonic-gate  */
3057c478bd9Sstevel@tonic-gate void
3067c478bd9Sstevel@tonic-gate st_destroy(Str_tbl *stp)
3077c478bd9Sstevel@tonic-gate {
3087c478bd9Sstevel@tonic-gate 	Str_hash	*sthash, *psthash;
3097c478bd9Sstevel@tonic-gate 	Str_master	*mstr, *pmstr;
3107c478bd9Sstevel@tonic-gate 	uint_t		i;
3117c478bd9Sstevel@tonic-gate 
3127c478bd9Sstevel@tonic-gate 	/*
3137c478bd9Sstevel@tonic-gate 	 * cleanup the master strings
3147c478bd9Sstevel@tonic-gate 	 */
3157c478bd9Sstevel@tonic-gate 	for (mstr = stp->st_mstrlist, pmstr = 0; mstr;
3167c478bd9Sstevel@tonic-gate 	    mstr = mstr->sm_next) {
3177c478bd9Sstevel@tonic-gate 		if (pmstr)
3187c478bd9Sstevel@tonic-gate 			free(pmstr);
3197c478bd9Sstevel@tonic-gate 		pmstr = mstr;
3207c478bd9Sstevel@tonic-gate 	}
3217c478bd9Sstevel@tonic-gate 	if (pmstr)
3227c478bd9Sstevel@tonic-gate 		free(pmstr);
3237c478bd9Sstevel@tonic-gate 
3247c478bd9Sstevel@tonic-gate 	if (stp->st_hashbcks) {
3257c478bd9Sstevel@tonic-gate 		for (i = 0; i < stp->st_hbckcnt; i++) {
3267c478bd9Sstevel@tonic-gate 			for (sthash = stp->st_hashbcks[i], psthash = 0;
3277c478bd9Sstevel@tonic-gate 			    sthash; sthash = sthash->hi_next) {
3287c478bd9Sstevel@tonic-gate 				if (psthash)
3297c478bd9Sstevel@tonic-gate 					free(psthash);
3307c478bd9Sstevel@tonic-gate 				psthash = sthash;
3317c478bd9Sstevel@tonic-gate 			}
3327c478bd9Sstevel@tonic-gate 			if (psthash)
3337c478bd9Sstevel@tonic-gate 				free(psthash);
3347c478bd9Sstevel@tonic-gate 		}
3357c478bd9Sstevel@tonic-gate 		free(stp->st_hashbcks);
3367c478bd9Sstevel@tonic-gate 	}
3377c478bd9Sstevel@tonic-gate 	free(stp);
3387c478bd9Sstevel@tonic-gate }
3397c478bd9Sstevel@tonic-gate 
3407c478bd9Sstevel@tonic-gate 
3417c478bd9Sstevel@tonic-gate /*
3427c478bd9Sstevel@tonic-gate  * For a given string - copy it into the buffer associated with
3437c478bd9Sstevel@tonic-gate  * the string table - and return the offset it has been assigned.
3447c478bd9Sstevel@tonic-gate  *
3457c478bd9Sstevel@tonic-gate  * If a value of '-1' is returned - the string was not found in
3467c478bd9Sstevel@tonic-gate  * the Str_tbl.
3477c478bd9Sstevel@tonic-gate  */
3487c478bd9Sstevel@tonic-gate int
349cce0e03bSab196087 st_setstring(Str_tbl *stp, const char *str, size_t *stoff)
3507c478bd9Sstevel@tonic-gate {
351cce0e03bSab196087 	size_t		stlen;
3527c478bd9Sstevel@tonic-gate 	uint_t		hashval;
3537c478bd9Sstevel@tonic-gate 	Str_hash	*sthash;
3547c478bd9Sstevel@tonic-gate 	Str_master	*mstr;
3557c478bd9Sstevel@tonic-gate 	int		i;
3567c478bd9Sstevel@tonic-gate 
3577c478bd9Sstevel@tonic-gate 	/*
3587c478bd9Sstevel@tonic-gate 	 * String table *must* have been previously cooked
3597c478bd9Sstevel@tonic-gate 	 */
3607c478bd9Sstevel@tonic-gate 	assert(stp->st_strbuf);
3617c478bd9Sstevel@tonic-gate 
3627c478bd9Sstevel@tonic-gate 	assert(stp->st_flags & FLG_STTAB_COOKED);
363cce0e03bSab196087 	stlen = strlen(str);
3647c478bd9Sstevel@tonic-gate 	/*
3657c478bd9Sstevel@tonic-gate 	 * Null string always points to head of string table
3667c478bd9Sstevel@tonic-gate 	 */
3677c478bd9Sstevel@tonic-gate 	if (stlen == 0) {
3687c478bd9Sstevel@tonic-gate 		*stoff = 0;
3697c478bd9Sstevel@tonic-gate 		return (0);
3707c478bd9Sstevel@tonic-gate 	}
3717c478bd9Sstevel@tonic-gate 
3727c478bd9Sstevel@tonic-gate 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
373cce0e03bSab196087 		size_t		_stoff;
3747c478bd9Sstevel@tonic-gate 
3757c478bd9Sstevel@tonic-gate 		stlen++;	/* count for trailing '\0' */
3767c478bd9Sstevel@tonic-gate 		_stoff = stp->st_nextoff;
3777c478bd9Sstevel@tonic-gate 		/*
3787c478bd9Sstevel@tonic-gate 		 * Have we overflowed our assigned buffer?
3797c478bd9Sstevel@tonic-gate 		 */
380a194faf8Srie 		if ((_stoff + stlen) > stp->st_fullstrsize)
3817c478bd9Sstevel@tonic-gate 			return (-1);
3827c478bd9Sstevel@tonic-gate 		memcpy(stp->st_strbuf + _stoff, str, stlen);
3837c478bd9Sstevel@tonic-gate 		*stoff = _stoff;
3847c478bd9Sstevel@tonic-gate 		stp->st_nextoff += stlen;
3857c478bd9Sstevel@tonic-gate 		return (0);
3867c478bd9Sstevel@tonic-gate 	}
3877c478bd9Sstevel@tonic-gate 
3887c478bd9Sstevel@tonic-gate 	/*
389a194faf8Srie 	 * Calculate reverse hash for string.
3907c478bd9Sstevel@tonic-gate 	 */
3917c478bd9Sstevel@tonic-gate 	hashval = HASHSEED;
3927c478bd9Sstevel@tonic-gate 	for (i = stlen; i >= 0; i--) {
3937c478bd9Sstevel@tonic-gate 		hashval = ((hashval << 5) + hashval) +
3947c478bd9Sstevel@tonic-gate 		    str[i];			/* h = ((h * 33) + c) */
3957c478bd9Sstevel@tonic-gate 	}
3967c478bd9Sstevel@tonic-gate 
3977c478bd9Sstevel@tonic-gate 	for (sthash = stp->st_hashbcks[hashval % stp->st_hbckcnt]; sthash;
3987c478bd9Sstevel@tonic-gate 	    sthash = sthash->hi_next) {
3997c478bd9Sstevel@tonic-gate 		const char	*hstr;
4007c478bd9Sstevel@tonic-gate 
401a194faf8Srie 		if (sthash->hi_hashval != hashval)
402a194faf8Srie 			continue;
403a194faf8Srie 
404a194faf8Srie 		hstr = &sthash->hi_mstr->sm_str[sthash->hi_mstr->sm_strlen -
405a194faf8Srie 		    sthash->hi_strlen];
406a194faf8Srie 		if (strcmp(str, hstr) == 0)
4077c478bd9Sstevel@tonic-gate 			break;
4087c478bd9Sstevel@tonic-gate 	}
4097c478bd9Sstevel@tonic-gate 
4107c478bd9Sstevel@tonic-gate 	/*
4117c478bd9Sstevel@tonic-gate 	 * Did we find the string?
4127c478bd9Sstevel@tonic-gate 	 */
4137c478bd9Sstevel@tonic-gate 	if (sthash == 0)
4147c478bd9Sstevel@tonic-gate 		return (-1);
4157c478bd9Sstevel@tonic-gate 
4167c478bd9Sstevel@tonic-gate 	/*
4177c478bd9Sstevel@tonic-gate 	 * Has this string been copied into the string table?
4187c478bd9Sstevel@tonic-gate 	 */
4197c478bd9Sstevel@tonic-gate 	mstr = sthash->hi_mstr;
420a194faf8Srie 	if (mstr->sm_stroff == 0) {
421cce0e03bSab196087 		size_t	mstrlen = mstr->sm_strlen + 1;
422a194faf8Srie 
423a194faf8Srie 		mstr->sm_stroff = stp->st_nextoff;
424a194faf8Srie 
4257c478bd9Sstevel@tonic-gate 		/*
4267c478bd9Sstevel@tonic-gate 		 * Have we overflowed our assigned buffer?
4277c478bd9Sstevel@tonic-gate 		 */
428a194faf8Srie 		if ((mstr->sm_stroff + mstrlen) > stp->st_fullstrsize)
4297c478bd9Sstevel@tonic-gate 			return (-1);
430a194faf8Srie 
431a194faf8Srie 		(void) memcpy(stp->st_strbuf + mstr->sm_stroff,
432a194faf8Srie 		    mstr->sm_str, mstrlen);
433a194faf8Srie 		stp->st_nextoff += mstrlen;
4347c478bd9Sstevel@tonic-gate 	}
435a194faf8Srie 
4367c478bd9Sstevel@tonic-gate 	/*
437a194faf8Srie 	 * Calculate offset of (sub)string.
4387c478bd9Sstevel@tonic-gate 	 */
439a194faf8Srie 	*stoff = mstr->sm_stroff + mstr->sm_strlen - sthash->hi_strlen;
4407c478bd9Sstevel@tonic-gate 
4417c478bd9Sstevel@tonic-gate 	return (0);
4427c478bd9Sstevel@tonic-gate }
4437c478bd9Sstevel@tonic-gate 
4447c478bd9Sstevel@tonic-gate 
4457c478bd9Sstevel@tonic-gate static int
446cce0e03bSab196087 st_hash_insert(Str_tbl *stp, const char *str, size_t len)
4477c478bd9Sstevel@tonic-gate {
4487c478bd9Sstevel@tonic-gate 	int		i;
4497c478bd9Sstevel@tonic-gate 	uint_t		hashval = HASHSEED;
4507c478bd9Sstevel@tonic-gate 	uint_t		bckcnt = stp->st_hbckcnt;
4517c478bd9Sstevel@tonic-gate 	Str_hash	**hashbcks = stp->st_hashbcks;
4527c478bd9Sstevel@tonic-gate 	Str_hash	*sthash;
4537c478bd9Sstevel@tonic-gate 	Str_master	*mstr = 0;
4547c478bd9Sstevel@tonic-gate 
4557c478bd9Sstevel@tonic-gate 	/*
4567c478bd9Sstevel@tonic-gate 	 * We use a classic 'Bernstein k=33' hash function.  But
4577c478bd9Sstevel@tonic-gate 	 * instead of hashing from the start of the string to the
4587c478bd9Sstevel@tonic-gate 	 * end, we do it in reverse.
4597c478bd9Sstevel@tonic-gate 	 *
4607c478bd9Sstevel@tonic-gate 	 * This way - we are essentially building all of the
4617c478bd9Sstevel@tonic-gate 	 * suffix hashvalues as we go.  We can check to see if
4627c478bd9Sstevel@tonic-gate 	 * any suffixes already exist in the tree as we generate
4637c478bd9Sstevel@tonic-gate 	 * the hash.
4647c478bd9Sstevel@tonic-gate 	 */
465a194faf8Srie 	for (i = len; i >= 0; i--) {
4667c478bd9Sstevel@tonic-gate 		hashval = ((hashval << 5) + hashval) +
4677c478bd9Sstevel@tonic-gate 		    str[i];			/* h = ((h * 33) + c) */
468a194faf8Srie 
4697c478bd9Sstevel@tonic-gate 		for (sthash = hashbcks[hashval % bckcnt];
4707c478bd9Sstevel@tonic-gate 		    sthash; sthash = sthash->hi_next) {
4717c478bd9Sstevel@tonic-gate 			const char	*hstr;
4727c478bd9Sstevel@tonic-gate 			Str_master	*_mstr;
4737c478bd9Sstevel@tonic-gate 
474a194faf8Srie 			if (sthash->hi_hashval != hashval)
475a194faf8Srie 				continue;
476a194faf8Srie 
4777c478bd9Sstevel@tonic-gate 			_mstr = sthash->hi_mstr;
478a194faf8Srie 			hstr = &_mstr->sm_str[_mstr->sm_strlen -
479a194faf8Srie 			    sthash->hi_strlen];
480a194faf8Srie 
481a194faf8Srie 			if (strcmp(&str[i], hstr))
482a194faf8Srie 				continue;
483a194faf8Srie 
4847c478bd9Sstevel@tonic-gate 			if (i == 0) {
4857c478bd9Sstevel@tonic-gate 				/*
486a194faf8Srie 				 * Entry already in table, increment refcnt and
487a194faf8Srie 				 * get out.
4887c478bd9Sstevel@tonic-gate 				 */
4897c478bd9Sstevel@tonic-gate 				sthash->hi_refcnt++;
4907c478bd9Sstevel@tonic-gate 				return (0);
4917c478bd9Sstevel@tonic-gate 			} else {
4927c478bd9Sstevel@tonic-gate 				/*
493a194faf8Srie 				 * If this 'suffix' is presently a 'master
494a194faf8Srie 				 * string, then take over it's record.
4957c478bd9Sstevel@tonic-gate 				 */
496a194faf8Srie 				if (sthash->hi_strlen == _mstr->sm_strlen) {
4977c478bd9Sstevel@tonic-gate 					/*
498a194faf8Srie 					 * we should only do this once.
4997c478bd9Sstevel@tonic-gate 					 */
5007c478bd9Sstevel@tonic-gate 					assert(mstr == 0);
5017c478bd9Sstevel@tonic-gate 					mstr = _mstr;
5027c478bd9Sstevel@tonic-gate 				}
5037c478bd9Sstevel@tonic-gate 			}
5047c478bd9Sstevel@tonic-gate 		}
5057c478bd9Sstevel@tonic-gate 	}
5067c478bd9Sstevel@tonic-gate 
5077c478bd9Sstevel@tonic-gate 	/*
5087c478bd9Sstevel@tonic-gate 	 * Do we need a new master string, or can we take over
5097c478bd9Sstevel@tonic-gate 	 * one we already found in the table?
5107c478bd9Sstevel@tonic-gate 	 */
5117c478bd9Sstevel@tonic-gate 	if (mstr == 0) {
5127c478bd9Sstevel@tonic-gate 		/*
5137c478bd9Sstevel@tonic-gate 		 * allocate a new master string
5147c478bd9Sstevel@tonic-gate 		 */
515*6b3ba5bdSAli Bahrami 		if ((mstr = calloc(sizeof (*mstr), 1)) == 0)
5167c478bd9Sstevel@tonic-gate 			return (-1);
5177c478bd9Sstevel@tonic-gate 		mstr->sm_next = stp->st_mstrlist;
5187c478bd9Sstevel@tonic-gate 		stp->st_mstrlist = mstr;
519a194faf8Srie 		stp->st_strsize += len + 1;
5207c478bd9Sstevel@tonic-gate 	} else {
5217c478bd9Sstevel@tonic-gate 		/*
522a194faf8Srie 		 * We are taking over a existing master string, the string size
523a194faf8Srie 		 * only increments by the difference between the current string
524a194faf8Srie 		 * and the previous master.
5257c478bd9Sstevel@tonic-gate 		 */
526a194faf8Srie 		assert(len > mstr->sm_strlen);
527a194faf8Srie 		stp->st_strsize += len - mstr->sm_strlen;
5287c478bd9Sstevel@tonic-gate 	}
5297c478bd9Sstevel@tonic-gate 
530*6b3ba5bdSAli Bahrami 	if ((sthash = calloc(sizeof (*sthash), 1)) == 0)
5317c478bd9Sstevel@tonic-gate 		return (-1);
5327c478bd9Sstevel@tonic-gate 
5337c478bd9Sstevel@tonic-gate 	mstr->sm_hashval = sthash->hi_hashval = hashval;
534a194faf8Srie 	mstr->sm_strlen = sthash->hi_strlen = len;
5357c478bd9Sstevel@tonic-gate 	mstr->sm_str = str;
5367c478bd9Sstevel@tonic-gate 	sthash->hi_refcnt = 1;
5377c478bd9Sstevel@tonic-gate 	sthash->hi_mstr = mstr;
5387c478bd9Sstevel@tonic-gate 
5397c478bd9Sstevel@tonic-gate 	/*
5407c478bd9Sstevel@tonic-gate 	 * Insert string element into head of hash list
5417c478bd9Sstevel@tonic-gate 	 */
5427c478bd9Sstevel@tonic-gate 	hashval = hashval % bckcnt;
5437c478bd9Sstevel@tonic-gate 	sthash->hi_next = hashbcks[hashval];
5447c478bd9Sstevel@tonic-gate 	hashbcks[hashval] = sthash;
5457c478bd9Sstevel@tonic-gate 	return (0);
5467c478bd9Sstevel@tonic-gate }
5477c478bd9Sstevel@tonic-gate 
5487c478bd9Sstevel@tonic-gate /*
5497c478bd9Sstevel@tonic-gate  * Return amount of space required for the string table.
5507c478bd9Sstevel@tonic-gate  */
551cce0e03bSab196087 size_t
5527c478bd9Sstevel@tonic-gate st_getstrtab_sz(Str_tbl *stp)
5537c478bd9Sstevel@tonic-gate {
554a194faf8Srie 	assert(stp->st_fullstrsize > 0);
5557c478bd9Sstevel@tonic-gate 
5567c478bd9Sstevel@tonic-gate 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
5577c478bd9Sstevel@tonic-gate 		stp->st_flags |= FLG_STTAB_COOKED;
558a194faf8Srie 		return (stp->st_fullstrsize);
5597c478bd9Sstevel@tonic-gate 	}
5607c478bd9Sstevel@tonic-gate 
5617c478bd9Sstevel@tonic-gate 	if ((stp->st_flags & FLG_STTAB_COOKED) == 0) {
562a194faf8Srie 		LenNode		*lnp;
5637c478bd9Sstevel@tonic-gate 		void		*cookie;
5647c478bd9Sstevel@tonic-gate 
5657c478bd9Sstevel@tonic-gate 		stp->st_flags |= FLG_STTAB_COOKED;
5667c478bd9Sstevel@tonic-gate 		/*
5677c478bd9Sstevel@tonic-gate 		 * allocate a hash table about the size of # of
5687c478bd9Sstevel@tonic-gate 		 * strings input.
5697c478bd9Sstevel@tonic-gate 		 */
570a194faf8Srie 		stp->st_hbckcnt = findprime(stp->st_strcnt);
571*6b3ba5bdSAli Bahrami 		if ((stp->st_hashbcks = calloc(sizeof (*stp->st_hashbcks),
572*6b3ba5bdSAli Bahrami 		    stp->st_hbckcnt)) == NULL)
5737c478bd9Sstevel@tonic-gate 			return (0);
5747c478bd9Sstevel@tonic-gate 
5757c478bd9Sstevel@tonic-gate 		/*
576a194faf8Srie 		 * We now walk all of the strings in the list, from shortest to
577a194faf8Srie 		 * longest, and insert them into the hashtable.
5787c478bd9Sstevel@tonic-gate 		 */
579a194faf8Srie 		if ((lnp = avl_first(stp->st_lentree)) == NULL) {
5807c478bd9Sstevel@tonic-gate 			/*
581a194faf8Srie 			 * Is it possible we have an empty string table, if so,
582a194faf8Srie 			 * the table still contains '\0', so return the size.
5837c478bd9Sstevel@tonic-gate 			 */
584a194faf8Srie 			if (avl_numnodes(stp->st_lentree) == 0) {
585a194faf8Srie 				assert(stp->st_strsize == 1);
586a194faf8Srie 				return (stp->st_strsize);
5877c478bd9Sstevel@tonic-gate 			}
5887c478bd9Sstevel@tonic-gate 			return (0);
5897c478bd9Sstevel@tonic-gate 		}
590a194faf8Srie 
591a194faf8Srie 		while (lnp) {
592a194faf8Srie 			StrNode	*snp;
5937c478bd9Sstevel@tonic-gate 
5947c478bd9Sstevel@tonic-gate 			/*
595a194faf8Srie 			 * Walk the string lists and insert them into the hash
596a194faf8Srie 			 * list.  Once a string is inserted we no longer need
597a194faf8Srie 			 * it's entry, so the string can be freed.
5987c478bd9Sstevel@tonic-gate 			 */
599a194faf8Srie 			for (snp = avl_first(lnp->ln_strtree); snp;
600a194faf8Srie 			    snp = AVL_NEXT(lnp->ln_strtree, snp)) {
601a194faf8Srie 				if (st_hash_insert(stp, snp->sn_str,
602a194faf8Srie 				    lnp->ln_strlen) == -1)
6037c478bd9Sstevel@tonic-gate 					return (0);
6047c478bd9Sstevel@tonic-gate 			}
6057c478bd9Sstevel@tonic-gate 
6067c478bd9Sstevel@tonic-gate 			/*
607a194faf8Srie 			 * Now that the strings have been copied, walk the
608a194faf8Srie 			 * StrNode tree and free all the AVL nodes.  Note,
609a194faf8Srie 			 * avl_destroy_nodes() beats avl_remove() as the
610a194faf8Srie 			 * latter balances the nodes as they are removed.
611a194faf8Srie 			 * We just want to tear the whole thing down fast.
6127c478bd9Sstevel@tonic-gate 			 */
6137c478bd9Sstevel@tonic-gate 			cookie = NULL;
614a194faf8Srie 			while ((snp = avl_destroy_nodes(lnp->ln_strtree,
6157c478bd9Sstevel@tonic-gate 			    &cookie)) != NULL)
616a194faf8Srie 				free(snp);
617a194faf8Srie 			avl_destroy(lnp->ln_strtree);
618a194faf8Srie 			free(lnp->ln_strtree);
619a194faf8Srie 			lnp->ln_strtree = NULL;
6207c478bd9Sstevel@tonic-gate 
621a194faf8Srie 			/*
622a194faf8Srie 			 * Move on to the next LenNode.
623a194faf8Srie 			 */
624a194faf8Srie 			lnp = AVL_NEXT(stp->st_lentree, lnp);
6257c478bd9Sstevel@tonic-gate 		}
6267c478bd9Sstevel@tonic-gate 
6277c478bd9Sstevel@tonic-gate 		/*
628a194faf8Srie 		 * Now that all of the strings have been freed, walk the
629a194faf8Srie 		 * LenNode tree and free all of the AVL nodes.  Note,
630a194faf8Srie 		 * avl_destroy_nodes() beats avl_remove() as the latter
631a194faf8Srie 		 * balances the nodes as they are removed. We just want to
632a194faf8Srie 		 * tear the whole thing down fast.
633a194faf8Srie 		 */
634a194faf8Srie 		cookie = NULL;
635a194faf8Srie 		while ((lnp = avl_destroy_nodes(stp->st_lentree,
636a194faf8Srie 		    &cookie)) != NULL)
637a194faf8Srie 			free(lnp);
638a194faf8Srie 		avl_destroy(stp->st_lentree);
639a194faf8Srie 		free(stp->st_lentree);
640a194faf8Srie 		stp->st_lentree = 0;
641a194faf8Srie 	}
642a194faf8Srie 
643a194faf8Srie 	assert(stp->st_strsize > 0);
644a194faf8Srie 	assert(stp->st_fullstrsize >= stp->st_strsize);
645a194faf8Srie 
646a194faf8Srie 	return (stp->st_strsize);
647a194faf8Srie }
648a194faf8Srie 
649a194faf8Srie /*
650a194faf8Srie  * Associate a buffer with a string table.
6517c478bd9Sstevel@tonic-gate  */
6527c478bd9Sstevel@tonic-gate const char *
6537c478bd9Sstevel@tonic-gate st_getstrbuf(Str_tbl *stp)
6547c478bd9Sstevel@tonic-gate {
6557c478bd9Sstevel@tonic-gate 	return (stp->st_strbuf);
6567c478bd9Sstevel@tonic-gate }
6577c478bd9Sstevel@tonic-gate 
6587c478bd9Sstevel@tonic-gate int
659cce0e03bSab196087 st_setstrbuf(Str_tbl *stp, char *stbuf, size_t bufsize)
6607c478bd9Sstevel@tonic-gate {
6617c478bd9Sstevel@tonic-gate 	assert(stp->st_flags & FLG_STTAB_COOKED);
6627c478bd9Sstevel@tonic-gate 
6637c478bd9Sstevel@tonic-gate 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
664a194faf8Srie 		if (bufsize < stp->st_fullstrsize)
6657c478bd9Sstevel@tonic-gate 			return (-1);
6667c478bd9Sstevel@tonic-gate 	} else {
667a194faf8Srie 		if (bufsize < stp->st_strsize)
6687c478bd9Sstevel@tonic-gate 			return (-1);
6697c478bd9Sstevel@tonic-gate 	}
6707c478bd9Sstevel@tonic-gate 
6717c478bd9Sstevel@tonic-gate 	stp->st_strbuf = stbuf;
6727c478bd9Sstevel@tonic-gate #ifdef	DEBUG
6737c478bd9Sstevel@tonic-gate 	/*
6747c478bd9Sstevel@tonic-gate 	 * for debug builds - start with a stringtable filled in
675*6b3ba5bdSAli Bahrami 	 * with '0xff'.  This makes it very easy to spot unfilled
676*6b3ba5bdSAli Bahrami 	 * holes in the strtab.
6777c478bd9Sstevel@tonic-gate 	 */
6787c478bd9Sstevel@tonic-gate 	memset(stbuf, 0xff, bufsize);
6797c478bd9Sstevel@tonic-gate 	stbuf[0] = '\0';
6807c478bd9Sstevel@tonic-gate #else
6817c478bd9Sstevel@tonic-gate 	memset(stbuf, 0x0, bufsize);
6827c478bd9Sstevel@tonic-gate #endif
6837c478bd9Sstevel@tonic-gate 	return (0);
6847c478bd9Sstevel@tonic-gate }
685