xref: /illumos-gate/usr/src/cmd/sgs/common/string_table.c (revision 8119dad84d6416f13557b0ba8e2aaf9064cbcfd3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <_string_table.h>
28 #include <strings.h>
29 #include <sgs.h>
30 #include <stdio.h>
31 
32 /*
33  * This file provides the interfaces to build a Str_tbl suitable for use by
34  * either the sgsmsg message system, or a standard ELF string table (SHT_STRTAB)
35  * as created by ld(1).
36  *
37  * There are two modes which can be used when constructing a string table:
38  *
39  *	st_new(0)
40  *		standard string table - no compression.  This is the
41  *		traditional, fast method.
42  *
43  *	st_new(FLG_STTAB_COMPRESS)
44  *		builds a compressed string table which both eliminates
45  *		duplicate strings, and permits strings with common suffixes
46  *		(atexit vs. exit) to overlap in the table.  This provides space
47  *		savings for many string tables.  Although more work than the
48  *		traditional method, the algorithms used are designed to scale
49  *		and keep any overhead at a minimum.
50  *
51  * These string tables are built with a common interface in a two-pass manner.
52  * The first pass finds all of the strings required for the string-table and
53  * calculates the size required for the final string table.
54  *
55  * The second pass allocates the string table, populates the strings into the
56  * table and returns the offsets the strings have been assigned.
57  *
58  * The calling sequence to build and populate a string table is:
59  *
60  *		st_new();		// initialize strtab
61  *
62  *		st_insert(st1);		// first pass of strings ...
63  *					// calculates size required for
64  *					// string table
65  *
66  *		st_delstring(st?);	// remove string previously
67  *					// inserted
68  *		st_insert(stN);
69  *
70  *		st_getstrtab_sz();	// freezes strtab and computes
71  *					// size of table.
72  *
73  *		st_setstrbuf();		// associates a final destination
74  *					// for the string table
75  *
76  *		st_setstring(st1);	// populate the string table
77  *		...			// offsets are based off of second
78  *					// pass	through the string table
79  *		st_setstring(stN);
80  *
81  *		st_destroy();		// tear down string table
82  *					// structures.
83  *
84  * String Suffix Compression Algorithm:
85  *
86  *   Here's a quick high level overview of the Suffix String
87  *   compression algorithm used.  First - the heart of the algorithm
88  *   is a Hash table list which represents a dictionary of all unique
89  *   strings inserted into the string table.  The hash function for
90  *   this table is a standard string hash except that the hash starts
91  *   at the last character in the string (&str[n - 1]) and works towards
92  *   the first character in the function (&str[0]).  As we compute the
93  *   HASH value for a given string, we also compute the hash values
94  *   for all of the possible suffix strings for that string.
95  *
96  *   As we compute the hash - at each character see if the current
97  *   suffix string for that hash is already present in the table.  If
98  *   it is, and the string is a master string.  Then change that
99  *   string to a suffix string of the new string being inserted.
100  *
101  *   When the final hash value is found (hash for str[0...n]), check
102  *   to see if it is in the hash table - if so increment the reference
103  *   count for the string.  If it is not yet in the table, insert a
104  *   new hash table entry for a master string.
105  *
106  *   The above method will find all suffixes of a given string given
107  *   that the strings are inserted from shortest to longest.  That is
108  *   why this is a two phase method, we first collect all of the
109  *   strings and store them based off of their length in an AVL tree.
110  *   Once all of the strings have been submitted we then start the
111  *   hash table build by traversing the AVL tree in order and
112  *   inserting the strings from shortest to longest as described
113  *   above.
114  */
115 
116 /* LINTLIBRARY */
117 
118 static int
119 avl_len_compare(const void *n1, const void *n2)
120 {
121 	size_t	len1, len2;
122 
123 	len1 = ((LenNode *)n1)->ln_strlen;
124 	len2 = ((LenNode *)n2)->ln_strlen;
125 
126 	if (len1 == len2)
127 		return (0);
128 	if (len2 < len1)
129 		return (1);
130 	return (-1);
131 }
132 
133 static int
134 avl_str_compare(const void *n1, const void *n2)
135 {
136 	const char	*str1, *str2;
137 	int		rc;
138 
139 	str1 = ((StrNode *)n1)->sn_str;
140 	str2 = ((StrNode *)n2)->sn_str;
141 
142 	rc = strcmp(str1, str2);
143 	if (rc > 0)
144 		return (1);
145 	if (rc < 0)
146 		return (-1);
147 	return (0);
148 }
149 
150 /*
151  * Return an initialized Str_tbl - returns NULL on failure.
152  *
153  * flags:
154  *	FLG_STTAB_COMPRESS - build a compressed string table
155  */
156 Str_tbl *
157 st_new(uint_t flags)
158 {
159 	Str_tbl	*stp;
160 
161 	if ((stp = calloc(1, sizeof (*stp))) == NULL)
162 		return (NULL);
163 
164 	/*
165 	 * Start with a leading '\0' - it's tradition.
166 	 */
167 	stp->st_strsize = stp->st_fullstrsize = stp->st_nextoff = 1;
168 
169 	/*
170 	 * Do we compress this string table?
171 	 */
172 	stp->st_flags = flags;
173 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0)
174 		return (stp);
175 
176 	if ((stp->st_lentree = calloc(1, sizeof (*stp->st_lentree))) == NULL)
177 		return (NULL);
178 
179 	avl_create(stp->st_lentree, &avl_len_compare, sizeof (LenNode),
180 	    SGSOFFSETOF(LenNode, ln_avlnode));
181 
182 	return (stp);
183 }
184 
185 /*
186  * Insert a new string into the Str_tbl.  There are two AVL trees used.
187  *
188  *  -	The first LenNode AVL tree maintains a tree of nodes based on string
189  *	sizes.
190  *  -	Each LenNode maintains a StrNode AVL tree for each string.  Large
191  *	applications have been known to contribute thousands of strings of
192  *	the same size.  Should strings need to be removed (-z ignore), then
193  *	the string AVL tree makes this removal efficient and scalable.
194  */
195 int
196 st_insert(Str_tbl *stp, const char *str)
197 {
198 	size_t		len;
199 	StrNode		*snp, sn = { 0 };
200 	LenNode		*lnp, ln = { 0 };
201 	avl_index_t	where;
202 
203 	/*
204 	 * String table can't have been cooked
205 	 */
206 	assert((stp->st_flags & FLG_STTAB_COOKED) == 0);
207 
208 	/*
209 	 * Null strings always point to the head of the string
210 	 * table - no reason to keep searching.
211 	 */
212 	if ((len = strlen(str)) == 0)
213 		return (0);
214 
215 	stp->st_fullstrsize += len + 1;
216 	stp->st_strcnt++;
217 
218 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0)
219 		return (0);
220 
221 	/*
222 	 * From the controlling string table, determine which LenNode AVL node
223 	 * provides for this string length.  If the node doesn't exist, insert
224 	 * a new node to represent this string length.
225 	 */
226 	ln.ln_strlen = len;
227 	if ((lnp = avl_find(stp->st_lentree, &ln, &where)) == NULL) {
228 		if ((lnp = calloc(1, sizeof (*lnp))) == NULL)
229 			return (-1);
230 
231 		if ((lnp->ln_strtree = calloc(1, sizeof (*lnp->ln_strtree))) ==
232 		    NULL) {
233 			free(lnp);
234 			return (-1);
235 		}
236 
237 		lnp->ln_strlen = len;
238 		avl_insert(stp->st_lentree, lnp, where);
239 
240 		avl_create(lnp->ln_strtree, &avl_str_compare, sizeof (StrNode),
241 		    SGSOFFSETOF(StrNode, sn_avlnode));
242 	}
243 
244 	/*
245 	 * From the string length AVL node determine whether a StrNode AVL node
246 	 * provides this string.  If the node doesn't exist, insert a new node
247 	 * to represent this string.
248 	 */
249 	sn.sn_str = str;
250 	if ((snp = avl_find(lnp->ln_strtree, &sn, &where)) == NULL) {
251 		if ((snp = calloc(1, sizeof (*snp))) == NULL)
252 			return (-1);
253 		snp->sn_str = str;
254 		avl_insert(lnp->ln_strtree, snp, where);
255 	}
256 	snp->sn_refcnt++;
257 
258 	return (0);
259 }
260 
261 /*
262  * Remove a previously inserted string from the Str_tbl.
263  */
264 int
265 st_delstring(Str_tbl *stp, const char *str)
266 {
267 	size_t		len;
268 	LenNode		*lnp, ln = { 0 };
269 	StrNode		*snp, sn = { 0 };
270 
271 	/*
272 	 * String table can't have been cooked
273 	 */
274 	assert((stp->st_flags & FLG_STTAB_COOKED) == 0);
275 
276 	len = strlen(str);
277 	stp->st_fullstrsize -= len + 1;
278 
279 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0)
280 		return (0);
281 
282 	/*
283 	 * Determine which LenNode AVL node provides for this string length.
284 	 */
285 	ln.ln_strlen = len;
286 	if ((lnp = avl_find(stp->st_lentree, &ln, 0)) != NULL) {
287 		sn.sn_str = str;
288 		if ((snp = avl_find(lnp->ln_strtree, &sn, 0)) != NULL) {
289 			/*
290 			 * Reduce the reference count, and if zero remove the
291 			 * node.
292 			 */
293 			if (--snp->sn_refcnt == 0)
294 				avl_remove(lnp->ln_strtree, snp);
295 			return (0);
296 		}
297 	}
298 
299 	/*
300 	 * No strings of this length, or no string itself - someone goofed.
301 	 */
302 	return (-1);
303 }
304 
305 /*
306  * Tear down a String_Table structure.
307  */
308 void
309 st_destroy(Str_tbl *stp)
310 {
311 	Str_hash	*sthash, *psthash;
312 	Str_master	*mstr, *pmstr;
313 	uint_t		i;
314 
315 	/*
316 	 * cleanup the master strings
317 	 */
318 	for (mstr = stp->st_mstrlist, pmstr = 0; mstr;
319 	    mstr = mstr->sm_next) {
320 		if (pmstr)
321 			free(pmstr);
322 		pmstr = mstr;
323 	}
324 	if (pmstr)
325 		free(pmstr);
326 
327 	if (stp->st_hashbcks) {
328 		for (i = 0; i < stp->st_hbckcnt; i++) {
329 			for (sthash = stp->st_hashbcks[i], psthash = 0;
330 			    sthash; sthash = sthash->hi_next) {
331 				if (psthash)
332 					free(psthash);
333 				psthash = sthash;
334 			}
335 			if (psthash)
336 				free(psthash);
337 		}
338 		free(stp->st_hashbcks);
339 	}
340 	free(stp);
341 }
342 
343 /*
344  * Hash a single additional character into hashval, separately so we can
345  * iteratively get suffix hashes.  See st_string_hash and st_hash_insert
346  */
347 static inline uint_t
348 st_string_hashround(uint_t hashval, char c)
349 {
350 	/* h = ((h * 33) + c) */
351 	return (((hashval << 5) + hashval) + c);
352 }
353 
354 /*
355  * We use a classic 'Bernstein k=33' hash function.  But
356  * instead of hashing from the start of the string to the
357  * end, we do it in reverse.
358  *
359  * This way we are essentially building all of the
360  * suffix hashvalues as we go.  We can check to see if
361  * any suffixes already exist in the tree as we generate
362  * the hash.
363  */
364 static inline uint_t
365 st_string_hash(const char *str)
366 {
367 	uint_t hashval = HASHSEED;
368 	size_t stlen = strlen(str);
369 
370 	/* We should never be hashing the NUL string */
371 	assert(stlen > 0);
372 
373 	for (int i = stlen; i >= 0; i--) {
374 		assert(i <= stlen); /* not unsigned->signed truncated */
375 		hashval = st_string_hashround(hashval, str[i]);
376 	}
377 
378 	return (hashval);
379 }
380 
381 /*
382  * For a given string - copy it into the buffer associated with the string
383  * table - and return the offset it has been assigned in stoff.
384  *
385  * If a value of '-1' is returned - the string was not found in
386  * the Str_tbl.
387  */
388 int
389 st_setstring(Str_tbl *stp, const char *str, size_t *stoff)
390 {
391 	size_t		stlen;
392 	uint_t		hashval;
393 	Str_hash	*sthash;
394 	Str_master	*mstr;
395 
396 	/*
397 	 * String table *must* have been previously cooked
398 	 */
399 	assert(stp->st_strbuf != NULL);
400 
401 	assert(stp->st_flags & FLG_STTAB_COOKED);
402 	stlen = strlen(str);
403 	/*
404 	 * Null string always points to head of string table
405 	 */
406 	if (stlen == 0) {
407 		if (stoff != NULL)
408 			*stoff = 0;
409 		return (0);
410 	}
411 
412 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
413 		size_t		_stoff;
414 
415 		stlen++;	/* count for trailing '\0' */
416 		_stoff = stp->st_nextoff;
417 		/*
418 		 * Have we overflowed our assigned buffer?
419 		 */
420 		if ((_stoff + stlen) > stp->st_fullstrsize)
421 			return (-1);
422 		memcpy(stp->st_strbuf + _stoff, str, stlen);
423 		if (stoff != NULL)
424 			*stoff = _stoff;
425 		stp->st_nextoff += stlen;
426 		return (0);
427 	}
428 
429 	/*
430 	 * Calculate reverse hash for string.
431 	 */
432 	hashval = st_string_hash(str);
433 
434 	for (sthash = stp->st_hashbcks[hashval % stp->st_hbckcnt]; sthash;
435 	    sthash = sthash->hi_next) {
436 		const char	*hstr;
437 
438 		if (sthash->hi_hashval != hashval)
439 			continue;
440 
441 		hstr = &sthash->hi_mstr->sm_str[sthash->hi_mstr->sm_strlen -
442 		    sthash->hi_strlen];
443 		if (strcmp(str, hstr) == 0)
444 			break;
445 	}
446 
447 	/*
448 	 * Did we find the string?
449 	 */
450 	if (sthash == 0)
451 		return (-1);
452 
453 	/*
454 	 * Has this string been copied into the string table?
455 	 */
456 	mstr = sthash->hi_mstr;
457 	if (mstr->sm_stroff == 0) {
458 		size_t	mstrlen = mstr->sm_strlen + 1;
459 
460 		mstr->sm_stroff = stp->st_nextoff;
461 
462 		/*
463 		 * Have we overflowed our assigned buffer?
464 		 */
465 		if ((mstr->sm_stroff + mstrlen) > stp->st_fullstrsize)
466 			return (-1);
467 
468 		(void) memcpy(stp->st_strbuf + mstr->sm_stroff,
469 		    mstr->sm_str, mstrlen);
470 		stp->st_nextoff += mstrlen;
471 	}
472 
473 	/*
474 	 * Calculate offset of (sub)string.
475 	 */
476 	if (stoff != NULL)
477 		*stoff = mstr->sm_stroff + mstr->sm_strlen - sthash->hi_strlen;
478 
479 	return (0);
480 }
481 
482 static int
483 st_hash_insert(Str_tbl *stp, const char *str, size_t len)
484 {
485 	int		i;
486 	uint_t		hashval = HASHSEED;
487 	uint_t		bckcnt = stp->st_hbckcnt;
488 	Str_hash	**hashbcks = stp->st_hashbcks;
489 	Str_hash	*sthash;
490 	Str_master	*mstr = 0;
491 
492 	for (i = len; i >= 0; i--) {
493 		/*
494 		 * Build up 'hashval' character by character, so we always
495 		 * have the hash of the current string suffix
496 		 */
497 		hashval = st_string_hashround(hashval, str[i]);
498 
499 		for (sthash = hashbcks[hashval % bckcnt];
500 		    sthash; sthash = sthash->hi_next) {
501 			const char	*hstr;
502 			Str_master	*_mstr;
503 
504 			if (sthash->hi_hashval != hashval)
505 				continue;
506 
507 			_mstr = sthash->hi_mstr;
508 			hstr = &_mstr->sm_str[_mstr->sm_strlen -
509 			    sthash->hi_strlen];
510 
511 			if (strcmp(&str[i], hstr))
512 				continue;
513 
514 			if (i == 0) {
515 				/*
516 				 * Entry already in table, increment refcnt and
517 				 * get out.
518 				 */
519 				sthash->hi_refcnt++;
520 				return (0);
521 			} else {
522 				/*
523 				 * If this 'suffix' is presently a 'master
524 				 * string, then take over it's record.
525 				 */
526 				if (sthash->hi_strlen == _mstr->sm_strlen) {
527 					/*
528 					 * we should only do this once.
529 					 */
530 					assert(mstr == 0);
531 					mstr = _mstr;
532 				}
533 			}
534 		}
535 	}
536 
537 	/*
538 	 * Do we need a new master string, or can we take over
539 	 * one we already found in the table?
540 	 */
541 	if (mstr == 0) {
542 		/*
543 		 * allocate a new master string
544 		 */
545 		if ((mstr = calloc(1, sizeof (*mstr))) == NULL)
546 			return (-1);
547 		mstr->sm_next = stp->st_mstrlist;
548 		stp->st_mstrlist = mstr;
549 		stp->st_strsize += len + 1;
550 	} else {
551 		/*
552 		 * We are taking over a existing master string, the string size
553 		 * only increments by the difference between the current string
554 		 * and the previous master.
555 		 */
556 		assert(len > mstr->sm_strlen);
557 		stp->st_strsize += len - mstr->sm_strlen;
558 	}
559 
560 	if ((sthash = calloc(1, sizeof (*sthash))) == NULL)
561 		return (-1);
562 
563 	mstr->sm_hashval = sthash->hi_hashval = hashval;
564 	mstr->sm_strlen = sthash->hi_strlen = len;
565 	mstr->sm_str = str;
566 	sthash->hi_refcnt = 1;
567 	sthash->hi_mstr = mstr;
568 
569 	/*
570 	 * Insert string element into head of hash list
571 	 */
572 	hashval = hashval % bckcnt;
573 	sthash->hi_next = hashbcks[hashval];
574 	hashbcks[hashval] = sthash;
575 	return (0);
576 }
577 
578 /*
579  * Return amount of space required for the string table.
580  */
581 size_t
582 st_getstrtab_sz(Str_tbl *stp)
583 {
584 	assert(stp->st_fullstrsize > 0);
585 
586 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
587 		stp->st_flags |= FLG_STTAB_COOKED;
588 		return (stp->st_fullstrsize);
589 	}
590 
591 	if ((stp->st_flags & FLG_STTAB_COOKED) == 0) {
592 		LenNode		*lnp;
593 		void		*cookie;
594 
595 		stp->st_flags |= FLG_STTAB_COOKED;
596 		/*
597 		 * allocate a hash table about the size of # of
598 		 * strings input.
599 		 */
600 		stp->st_hbckcnt = findprime(stp->st_strcnt);
601 		if ((stp->st_hashbcks = calloc(stp->st_hbckcnt,
602 		    sizeof (*stp->st_hashbcks))) == NULL)
603 			return (0);
604 
605 		/*
606 		 * We now walk all of the strings in the list, from shortest to
607 		 * longest, and insert them into the hashtable.
608 		 */
609 		if ((lnp = avl_first(stp->st_lentree)) == NULL) {
610 			/*
611 			 * Is it possible we have an empty string table, if so,
612 			 * the table still contains '\0', so return the size.
613 			 */
614 			if (avl_numnodes(stp->st_lentree) == 0) {
615 				assert(stp->st_strsize == 1);
616 				return (stp->st_strsize);
617 			}
618 			return (0);
619 		}
620 
621 		while (lnp) {
622 			StrNode	*snp;
623 
624 			/*
625 			 * Walk the string lists and insert them into the hash
626 			 * list.  Once a string is inserted we no longer need
627 			 * it's entry, so the string can be freed.
628 			 */
629 			for (snp = avl_first(lnp->ln_strtree); snp;
630 			    snp = AVL_NEXT(lnp->ln_strtree, snp)) {
631 				if (st_hash_insert(stp, snp->sn_str,
632 				    lnp->ln_strlen) == -1)
633 					return (0);
634 			}
635 
636 			/*
637 			 * Now that the strings have been copied, walk the
638 			 * StrNode tree and free all the AVL nodes.  Note,
639 			 * avl_destroy_nodes() beats avl_remove() as the
640 			 * latter balances the nodes as they are removed.
641 			 * We just want to tear the whole thing down fast.
642 			 */
643 			cookie = NULL;
644 			while ((snp = avl_destroy_nodes(lnp->ln_strtree,
645 			    &cookie)) != NULL)
646 				free(snp);
647 			avl_destroy(lnp->ln_strtree);
648 			free(lnp->ln_strtree);
649 			lnp->ln_strtree = NULL;
650 
651 			/*
652 			 * Move on to the next LenNode.
653 			 */
654 			lnp = AVL_NEXT(stp->st_lentree, lnp);
655 		}
656 
657 		/*
658 		 * Now that all of the strings have been freed, walk the
659 		 * LenNode tree and free all of the AVL nodes.  Note,
660 		 * avl_destroy_nodes() beats avl_remove() as the latter
661 		 * balances the nodes as they are removed. We just want to
662 		 * tear the whole thing down fast.
663 		 */
664 		cookie = NULL;
665 		while ((lnp = avl_destroy_nodes(stp->st_lentree,
666 		    &cookie)) != NULL)
667 			free(lnp);
668 		avl_destroy(stp->st_lentree);
669 		free(stp->st_lentree);
670 		stp->st_lentree = 0;
671 	}
672 
673 	assert(stp->st_strsize > 0);
674 	assert(stp->st_fullstrsize >= stp->st_strsize);
675 
676 	return (stp->st_strsize);
677 }
678 
679 const char *
680 st_getstrbuf(Str_tbl *stp)
681 {
682 	return (stp->st_strbuf);
683 }
684 
685 
686 /*
687  * Associate a buffer with a string table.
688  */
689 int
690 st_setstrbuf(Str_tbl *stp, char *stbuf, size_t bufsize)
691 {
692 	assert(stp->st_flags & FLG_STTAB_COOKED);
693 
694 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
695 		if (bufsize < stp->st_fullstrsize)
696 			return (-1);
697 	} else {
698 		if (bufsize < stp->st_strsize)
699 			return (-1);
700 	}
701 
702 	stp->st_strbuf = stbuf;
703 #ifdef	DEBUG
704 	/*
705 	 * for debug builds - start with a stringtable filled in
706 	 * with '0xff'.  This makes it very easy to spot unfilled
707 	 * holes in the strtab.
708 	 */
709 	memset(stbuf, 0xff, bufsize);
710 	stbuf[0] = '\0';
711 #else
712 	memset(stbuf, 0x0, bufsize);
713 #endif
714 	return (0);
715 }
716 
717 /*
718  * Populate the buffer with all strings from stp.
719  * The table must be compressed and cooked
720  */
721 void
722 st_setallstrings(Str_tbl *stp)
723 {
724 	assert(stp->st_strbuf != NULL);
725 	assert((stp->st_flags & FLG_STTAB_COOKED));
726 	assert((stp->st_flags & FLG_STTAB_COMPRESS));
727 
728 	for (Str_master *str = stp->st_mstrlist; str != NULL;
729 	    str = str->sm_next) {
730 		int res __maybe_unused;
731 
732 		res = st_setstring(stp, str->sm_str, NULL);
733 		assert(res == 0);
734 	}
735 }
736 
737 /*
738  * Find str in the given table
739  * return it's offset, or -1
740  */
741 off_t
742 st_findstring(Str_tbl *stp, const char *needle)
743 {
744 	uint_t hashval;
745 	Str_hash *sthash;
746 	Str_master *mstr;
747 
748 	assert(stp->st_strbuf != NULL);
749 	assert((stp->st_flags & FLG_STTAB_COOKED));
750 
751 	/* The NUL string is always first */
752 	if (needle[0] == '\0')
753 		return (0);
754 
755 	/* In the uncompressed case we must linear search */
756 	if ((stp->st_flags & FLG_STTAB_COMPRESS) == 0) {
757 		const char *str, *end;
758 
759 		end = stp->st_strbuf + stp->st_fullstrsize;
760 
761 		for (str = stp->st_strbuf; str < end;
762 		    str += strlen(str) + 1) {
763 			if (strcmp(str, needle) == 0)
764 				return (str - stp->st_strbuf);
765 		}
766 
767 		return (-1);
768 	}
769 
770 	hashval = st_string_hash(needle);
771 
772 	for (sthash = stp->st_hashbcks[hashval % stp->st_hbckcnt];
773 	    sthash != NULL;
774 	    sthash = sthash->hi_next) {
775 		const char	*hstr;
776 
777 		if (sthash->hi_hashval != hashval)
778 			continue;
779 
780 		hstr = &sthash->hi_mstr->sm_str[sthash->hi_mstr->sm_strlen -
781 		    sthash->hi_strlen];
782 		if (strcmp(needle, hstr) == 0)
783 			break;
784 	}
785 
786 	/*
787 	 * Did we find the string?
788 	 */
789 	if (sthash == NULL)
790 		return (-1);
791 
792 	mstr = sthash->hi_mstr;
793 	assert(mstr->sm_stroff != 0);
794 
795 	/*
796 	 * Calculate offset of (sub)string.
797 	 */
798 	return (mstr->sm_stroff + mstr->sm_strlen - sthash->hi_strlen);
799 }
800