xref: /freebsd/contrib/tcsh/sh.hist.c (revision 3823d5e198425b4f5e5a80267d195769d1063773)
1 /* $Header: /p/tcsh/cvsroot/tcsh/sh.hist.c,v 3.53 2011/01/24 18:10:26 christos Exp $ */
2 /*
3  * sh.hist.c: Shell history expansions and substitutions
4  */
5 /*-
6  * Copyright (c) 1980, 1991 The Regents of the University of California.
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  */
33 #include "sh.h"
34 
35 RCSID("$tcsh: sh.hist.c,v 3.53 2011/01/24 18:10:26 christos Exp $")
36 
37 #include <assert.h>
38 #include "tc.h"
39 
40 extern int histvalid;
41 extern struct Strbuf histline;
42 Char HistLit = 0;
43 
44 static	int	heq	(const struct wordent *, const struct wordent *);
45 static	void	hfree	(struct Hist *);
46 
47 #define HIST_ONLY	0x01
48 #define HIST_SAVE	0x02
49 #define HIST_LOAD	0x04
50 #define HIST_REV	0x08
51 #define HIST_CLEAR	0x10
52 #define HIST_MERGE	0x20
53 #define HIST_TIME	0x40
54 
55 /*
56  * C shell
57  */
58 
59 /* Static functions don't show up in gprof summaries.  So eliminate "static"
60  * modifier from some frequently called functions. */
61 #ifdef PROF
62 #define PG_STATIC
63 #else
64 #define PG_STATIC static
65 #endif
66 
67 /* #define DEBUG_HIST 1 */
68 
69 static const int fastMergeErase = 1;
70 static unsigned histCount = 0;		/* number elements on history list */
71 static struct Hist *histTail = NULL;     /* last element on history list */
72 static struct Hist *histMerg = NULL;	 /* last element merged by Htime */
73 
74 static void insertHistHashTable(struct Hist *, unsigned);
75 
76 
77 /* Insert new element (hp) in history list after specified predecessor (pp). */
78 static void
79 hinsert(struct Hist *hp, struct Hist *pp)
80 {
81     struct Hist *fp = pp->Hnext;        /* following element, if any */
82     hp->Hnext = fp, hp->Hprev = pp;
83     pp->Hnext = hp;
84     if (fp)
85         fp->Hprev = hp;
86     else
87         histTail = hp;                  /* meaning hp->Hnext == NULL */
88     histCount++;
89 }
90 
91 /* Remove the entry from the history list. */
92 static void
93 hremove(struct Hist *hp)
94 {
95     struct Hist *pp = hp->Hprev;
96     assert(pp);                         /* elements always have a previous */
97     pp->Hnext = hp->Hnext;
98     if (hp->Hnext)
99         hp->Hnext->Hprev = pp;
100     else
101         histTail = pp;                  /* we must have been last */
102     if (hp == histMerg)			/* deleting this hint from list */
103 	histMerg = NULL;
104     assert(histCount > 0);
105     histCount--;
106 }
107 
108 /* Prune length of history list to specified size by history variable. */
109 PG_STATIC void
110 discardExcess(int histlen)
111 {
112     struct Hist *hp, *np;
113     if (histTail == NULL) {
114         assert(histCount == 0);
115         return;                         /* no entries on history list */
116     }
117     /* Prune dummy entries from the front, then old entries from the back. If
118      * the list is still too long scan the whole list as before.  But only do a
119      * full scan if the list is more than 6% (1/16th) too long. */
120     while (histCount > (unsigned)histlen && (np = Histlist.Hnext)) {
121         if (eventno - np->Href >= histlen || histlen == 0)
122             hremove(np), hfree(np);
123         else
124             break;
125     }
126     while (histCount > (unsigned)histlen && (np = histTail) != &Histlist) {
127         if (eventno - np->Href >= histlen || histlen == 0)
128             hremove(np), hfree(np);
129         else
130             break;
131     }
132     if (histCount - (histlen >> 4) <= (unsigned)histlen)
133 	return;				/* don't bother doing the full scan */
134     for (hp = &Histlist; histCount > (unsigned)histlen &&
135 	(np = hp->Hnext) != NULL;)
136         if (eventno - np->Href >= histlen || histlen == 0)
137             hremove(np), hfree(np);
138         else
139             hp = np;
140 }
141 
142 /* Add the command "sp" to the history list. */
143 void
144 savehist(
145   struct wordent *sp,
146   int mflg)				/* true if -m (merge) specified */
147 {
148     int histlen = 0;
149     Char   *cp;
150 
151     /* throw away null lines */
152     if (sp && sp->next->word[0] == '\n')
153 	return;
154     cp = varval(STRhistory);
155     while (*cp) {
156 	if (!Isdigit(*cp)) {
157 	    histlen = 0;
158 	    break;
159 	}
160 	histlen = histlen * 10 + *cp++ - '0';
161     }
162     if (sp)
163         (void) enthist(++eventno, sp, 1, mflg, histlen);
164     discardExcess(histlen);
165 }
166 
167 #define USE_JENKINS_HASH 1
168 /* #define USE_ONE_AT_A_TIME 1 */
169 #undef PRIME_LENGTH			/* no need for good HTL */
170 
171 #ifdef USE_JENKINS_HASH
172 #define hashFcnName "lookup3"
173 /* From:
174    lookup3.c, by Bob Jenkins, May 2006, Public Domain.
175    "...  You can use this free for any purpose.  It's in
176     the public domain.  It has no warranty."
177    http://burtleburtle.net/bob/hash/index.html
178  */
179 
180 #define rot(x,k) (((x)<<(k)) | ((x)>>(32-(k))))
181 #define mix(a,b,c) \
182 { \
183   a -= c;  a ^= rot(c, 4);  c += b; \
184   b -= a;  b ^= rot(a, 6);  a += c; \
185   c -= b;  c ^= rot(b, 8);  b += a; \
186   a -= c;  a ^= rot(c,16);  c += b; \
187   b -= a;  b ^= rot(a,19);  a += c; \
188   c -= b;  c ^= rot(b, 4);  b += a; \
189 }
190 #define final(a,b,c) \
191 { \
192   c ^= b; c -= rot(b,14); \
193   a ^= c; a -= rot(c,11); \
194   b ^= a; b -= rot(a,25); \
195   c ^= b; c -= rot(b,16); \
196   a ^= c; a -= rot(c, 4); \
197   b ^= a; b -= rot(a,14); \
198   c ^= b; c -= rot(b,24); \
199 }
200 
201 struct hashValue		  /* State used to hash a wordend word list. */
202 {
203     uint32_t a, b, c;
204 };
205 
206 /* Set up the internal state */
207 static void
208 initializeHash(struct hashValue *h)
209 {
210     h->a = h->b = h->c = 0xdeadbeef;
211 }
212 
213 /* This does a partial hash of the Chars in a single word.  For efficiency we
214  * include 3 versions of the code to pack Chars into 32-bit words for the
215  * mixing function. */
216 static void
217 addWordToHash(struct hashValue *h, const Char *word)
218 {
219     uint32_t a = h->a, b = h->b, c = h->c;
220 #ifdef SHORT_STRINGS
221 #ifdef WIDE_STRINGS
222     assert(sizeof(Char) >= 4);
223     while (1) {
224 	unsigned k;
225 	if ((k = (uChar)*word++) == 0) break; a += k;
226 	if ((k = (uChar)*word++) == 0) break; b += k;
227 	if ((k = (uChar)*word++) == 0) break; c += k;
228 	mix(a, b, c);
229     }
230 #else
231     assert(sizeof(Char) == 2);
232     while (1) {
233 	unsigned k;
234 	if ((k = (uChar)*word++) == 0) break; a += k;
235 	if ((k = (uChar)*word++) == 0) break; a += k << 16;
236 	if ((k = (uChar)*word++) == 0) break; b += k;
237 	if ((k = (uChar)*word++) == 0) break; b += k << 16;
238 	if ((k = (uChar)*word++) == 0) break; c += k;
239 	if ((k = (uChar)*word++) == 0) break; c += k << 16;
240 	mix(a, b, c);
241     }
242 #endif
243 #else
244     assert(sizeof(Char) == 1);
245     while (1) {
246 	unsigned k;
247 	if ((k = *word++) == 0) break; a += k;
248 	if ((k = *word++) == 0) break; a += k << 8;
249 	if ((k = *word++) == 0) break; a += k << 16;
250 	if ((k = *word++) == 0) break; a += k << 24;
251 	if ((k = *word++) == 0) break; b += k;
252 	if ((k = *word++) == 0) break; b += k << 8;
253 	if ((k = *word++) == 0) break; b += k << 16;
254 	if ((k = *word++) == 0) break; b += k << 24;
255 	if ((k = *word++) == 0) break; c += k;
256 	if ((k = *word++) == 0) break; c += k << 8;
257 	if ((k = *word++) == 0) break; c += k << 16;
258 	if ((k = *word++) == 0) break; c += k << 24;
259 	mix(a, b, c);
260     }
261 #endif
262     h->a = a, h->b = b, h->c = c;
263 }
264 
265 static void
266 addCharToHash(struct hashValue *h, Char ch)
267 {
268     /* The compiler (gcc -O2) seems to do a good job optimizing this without
269      * explicitly extracting into local variables. */
270     h->a += (uChar)ch;
271     mix(h->a, h->b, h->c);
272 }
273 
274 static uint32_t
275 finalizeHash(struct hashValue *h)
276 {
277     uint32_t a = h->a, b = h->b, c = h->c;
278     final(a, b, c);
279     return c;
280 }
281 
282 #elif USE_ONE_AT_A_TIME
283 #define hashFcnName "one-at-a-time"
284 /* This one is also from Bob Jenkins, but is slower but simpler than lookup3.
285    "...  The code given here are all public domain."
286    http://burtleburtle.net/bob/hash/doobs.html */
287 
288 #if 0
289 ub4
290 one_at_a_time(char *key, ub4 len)
291 {
292   ub4   hash, i;
293   for (hash=0, i=0; i<len; ++i)
294   {
295     hash += key[i];
296     hash += (hash << 10);
297     hash ^= (hash >> 6);
298   }
299   hash += (hash << 3);
300   hash ^= (hash >> 11);
301   hash += (hash << 15);
302   return (hash & mask);
303 }
304 #endif
305 
306 struct hashValue { uint32_t h; };
307 static void
308 initializeHash(struct hashValue *h)
309 {
310     h->h = 0;
311 }
312 
313 static void
314 addWordToHash(struct hashValue *h, const Char *word)
315 {
316     unsigned k;
317     uint32_t hash = h->h;
318     while (k = (uChar)*word++)
319 	hash += k, hash += hash << 10, hash ^= hash >> 6;
320     h->h = hash;
321 }
322 
323 static void
324 addCharToHash(struct hashValue *h, Char c)
325 {
326     Char b[2] = { c, 0 };
327     addWordToHash(h, b);
328 }
329 
330 static uint32_t
331 finalizeHash(struct hashValue *h)
332 {
333     unsigned hash = h->h;
334     hash += (hash << 3);
335     hash ^= (hash >> 11);
336     hash += (hash << 15);
337     return hash;
338 }
339 
340 #else
341 #define hashFcnName "add-mul"
342 /* Simple multipy and add hash. */
343 #define PRIME_LENGTH 1			/* need "good" HTL */
344 struct hashValue { uint32_t h; };
345 static void
346 initializeHash(struct hashValue *h)
347 {
348     h->h = 0xe13e2345;
349 }
350 
351 static void
352 addWordToHash(struct hashValue *h, const Char *word)
353 {
354     unsigned k;
355     uint32_t hash = h->h;
356     while (k = (uChar)*word++)
357 	hash = hash * 0x9e4167b9 + k;
358     h->h = hash;
359 }
360 
361 static void
362 addCharToHash(struct hashValue *h, Char c)
363 {
364     h->h = h->h * 0x9e4167b9 + (uChar)c;
365 }
366 
367 static uint32_t
368 finalizeHash(struct hashValue *h)
369 {
370     return h->h;
371 }
372 #endif
373 
374 static unsigned
375 hashhist(struct wordent *h0)
376 {
377     struct hashValue s;
378     struct wordent *firstWord = h0->next;
379     struct wordent *h = firstWord;
380     unsigned hash = 0;
381 
382     initializeHash(&s);
383     for (; h != h0; h = h->next) {
384         if (h->word[0] == '\n')
385             break;                      /* don't hash newline */
386         if (h != firstWord)
387             addCharToHash(&s, ' ');	/* space between words */
388 	addWordToHash(&s, h->word);
389     }
390     hash = finalizeHash(&s);
391     /* Zero means no hash value, so never return zero as a hash value. */
392     return hash ? hash : 0x7fffffff;	/* prime! */
393 }
394 
395 #if 0
396 unsigned
397 hashStr(Char *str)
398 {
399     struct hashValue s;
400     initializeHash(&s);
401     addWordToHash(&s, str);
402     return finalizeHash(&s);
403 }
404 #endif
405 
406 #ifdef PRIME_LENGTH			/* need good HTL */
407 #define hash2tableIndex(hash, len) ((hash) % len)
408 #else
409 #define hash2tableIndex(hash, len) ((hash) & (len-1))
410 #endif
411 
412 /* This code can be enabled to test the above hash functions for speed and
413  * collision avoidance.  The testing is enabled by "occasional" calls to
414  * displayHistStats(), see which. */
415 #ifdef DEBUG_HIST
416 
417 #ifdef BSDTIMES
418 static double
419 doTiming(int start) {
420     static struct timeval beginTime;
421     if (start) {
422 	gettimeofday(&beginTime, NULL);
423 	return 0.0;
424     } else {
425 	struct timeval now;
426 	gettimeofday(&now, NULL);
427 	return (now.tv_sec-beginTime.tv_sec) +
428 	    (now.tv_usec-beginTime.tv_usec)/1e6;
429     }
430 }
431 #else
432 static double
433 doTiming(int start) {
434     USE(start);
435     return 0.0;
436 }
437 #endif
438 
439 static void
440 generateHashes(int nChars, unsigned nWords, unsigned samples, unsigned *hashes,
441     unsigned length)
442 {
443     if (nChars < 1)
444 	return;
445     nWords = (nWords < 1) ? 1 : (nWords > 4) ? 4 : nWords;
446     Char *number = xmalloc((nChars+nWords)*sizeof(Char));
447     struct wordent word[4];
448     struct wordent base = { NULL, &word[0], &word[0] };
449     word[0].word = number, word[0].next = &base, word[0].prev = &base;
450     unsigned w = 0;			/* word number */
451     /* Generate multiple words of length 2, 3, 5, then all the rest. */
452     unsigned wBoundaries[4] = { 2-1, 2+3-1, 2+3+5-1, 0 };
453     /* Ensure the last word has at least 4 Chars in it. */
454     while (nWords >= 2 && nChars < (wBoundaries[nWords-2]+1) + 4)
455 	nWords--;
456     wBoundaries[nWords-1] = 0xffffffff;	/* don't end word past this point */
457     unsigned i;
458     for (i = 0; i<nChars; i++) {
459 	/* In deference to the gawd awful add-mul hash, we won't use the worse
460 	 * case here (setting all Chars to 1), but assume mostly (or at least
461 	 * initially) ASCII data. */
462 	number[i+w] = '!';		/* 0x21 = 33 */
463 
464 	if (i == wBoundaries[w]) {	/* end a word here and move to next */
465 	    w++;			/* next word */
466 	    number[i+w] = 0;		/* terminate */
467 	    word[w].word = &number[i+w+1];
468 	    word[w].next = &base, word[w].prev = &word[w-1];
469 	    word[w-1].next = &word[w], base.prev = &word[w];
470 	}
471     }
472     /* w is the index of the last word actually created. */
473     number[nChars + w] = 0;		/* terminate last word */
474     unsigned timeLimit = !samples;
475     if (samples == 0)
476 	samples = 1000000000;
477     doTiming(1);
478     double sec;
479     for (i = 0; i < samples; i++) {
480 	/* increment 4 digit base 255 number; last characters vary fastest */
481 	unsigned j = nChars-1 + w;
482 	while (1) {
483 	    if (++number[j] != 0)
484 		break;
485 	    /* else reset this digit and proceed to next one */
486 	    number[j] = 1;
487 	    if (&number[j] <= word[w].word)
488 		break;			/* stop at beginning of last word */
489 	    j--;
490 	}
491 	if (word[w].word[0] == '\n')
492 	    word[w].word[0]++;		/* suppress newline character */
493 	unsigned hash = hashhist(&base);
494 	hashes[hash2tableIndex(hash, length)]++;
495 	if (timeLimit && (i & 0x3ffff) == 0x3ffff) {
496 	    sec = doTiming(0);
497 	    if (sec > 10)
498 		break;
499 	}
500     }
501     if (i >= samples)
502 	sec = doTiming(0);
503     else
504 	samples = i;			/* number we actually did */
505     if (sec > 0.01) {
506 	xprintf("Hash %d (%d Char %u words) with %s: %d nsec/hash, %d mcps\n",
507 		samples, nChars, w+1, hashFcnName, (int)((sec/samples)*1e9),
508 		(int)((double)samples*nChars/sec/1e6));
509     }
510 }
511 #endif /* DEBUG_HIST */
512 
513 #ifdef DEBUG_HIST
514 static void
515 testHash(void)
516 {
517     static const Char STRtestHashTimings[] =
518 	{ 't','e','s','t','H','a','s','h','T','i','m','i','n','g','s', 0 };
519     struct varent *vp = adrof(STRtestHashTimings);
520     if (vp && vp->vec) {
521 	unsigned hashes[4];		/* dummy place to put hashes */
522 	Char **vals = vp->vec;
523 	while (*vals) {
524 	    int length = getn(*vals);
525 	    unsigned words =
526 		(length < 5) ? 1 : (length < 25) ? 2 : (length < 75) ? 3 : 4;
527 	    if (length > 0)
528 		generateHashes(length, words, 0, hashes, 4);
529 	    vals++;
530 	}
531     }
532     unsigned length = 1024;
533 #ifdef PRIME_LENGTH			/* need good HTL */
534     length = 1021;
535 #endif
536     unsigned *hashes = xmalloc(length*sizeof(unsigned));
537     memset(hashes, 0, length*sizeof(unsigned));
538     /* Compute collision statistics for half full hashes modulo "length". */
539     generateHashes(4, 1, length/2, hashes, length);
540     /* Evaluate collisions by comparing occupancy rates (mean value 0.5).
541      * One bin for each number of hits. */
542     unsigned bins[155];
543     memset(bins, 0, sizeof(bins));
544     unsigned highest = 0;
545     unsigned i;
546     for (i = 0; i<length; i++) {
547 	unsigned hits = hashes[i];
548 	if (hits >= sizeof(bins)/sizeof(bins[0])) /* clip */
549 	    hits = highest = sizeof(bins)/sizeof(bins[0]) - 1;
550 	if (hits > highest)
551 	    highest = hits;
552 	bins[hits]++;
553     }
554     xprintf("Occupancy of %d buckets by %d hashes %d Chars %d word with %s\n",
555 	    length, length/2, 4, 1, hashFcnName);
556     for (i = 0; i <= highest; i++) {
557 	xprintf(" %d buckets (%d%%) with %d hits\n",
558 		bins[i], bins[i]*100/length, i);
559     }
560     /* Count run lengths to evaluate linear rehashing effectiveness.  Estimate
561      * a little corrupted by edge effects. */
562     memset(bins, 0, sizeof(bins));
563     highest = 0;
564     for (i = 0; hashes[i] == 0; i++);	/* find first occupied bucket */
565     unsigned run = 0;
566     unsigned rehashed = 0;
567     for (; i<length; i++) {
568 	unsigned hits = hashes[i];
569 	if (hits == 0 && rehashed > 0)
570 	    hits = 1 && rehashed--;
571 	else if (hits > 1)
572 	    rehashed += hits-1;
573 	if (hits)
574 	    run++;
575 	else {
576 	    /* a real free slot, count it */
577 	    if (run >= sizeof(bins)/sizeof(bins[0])) /* clip */
578 		run = highest = sizeof(bins)/sizeof(bins[0]) - 1;
579 	    if (run > highest)
580 		highest = run;
581 	    bins[run]++;
582 	    run = 0;
583 	}
584     }
585     /* Ignore the partial run at end as we ignored the beginning. */
586     double merit = 0.0, entries = 0;
587     for (i = 0; i <= highest; i++) {
588 	entries += bins[i]*i;		/* total hashed objects */
589 	merit += bins[i]*i*i;
590     }
591     xprintf("Rehash collision figure of merit %u (ideal=100), run lengths:\n",
592 	    (int)(100.0*merit/entries));
593     for (i = 0; i <= highest; i++) {
594 	if (bins[i] != 0)
595 	    xprintf(" %d runs of length %d buckets\n", bins[i], i);
596     }
597     xfree(hashes);
598 }
599 #endif /* DEBUG_HIST */
600 
601 /* Compares two word lists for equality. */
602 static int
603 heq(const struct wordent *a0, const struct wordent *b0)
604 {
605     const struct wordent *a = a0->next, *b = b0->next;
606 
607     for (;;) {
608 	if (Strcmp(a->word, b->word) != 0)
609 	    return 0;
610 	a = a->next;
611 	b = b->next;
612 	if (a == a0)
613 	    return (b == b0) ? 1 : 0;
614 	if (b == b0)
615 	    return 0;
616     }
617 }
618 
619 /* Renumber entries following p, which we will be deleting. */
620 PG_STATIC void
621 renumberHist(struct Hist *p)
622 {
623     int n = p->Href;
624     while ((p = p->Hnext))
625         p->Href = n--;
626 }
627 
628 /* The hash table is implemented as an array of pointers to Hist entries.  Each
629  * entry is located in the table using hash2tableIndex() and checking the
630  * following entries in case of a collision (linear rehash).  Free entries in
631  * the table are zero (0, NULL, emptyHTE).  Deleted entries that cannot yet be
632  * freed are set to one (deletedHTE).  The Hist.Hhash member is non-zero iff
633  * the entry is in the hash table.  When the hash table get too full, it is
634  * reallocated to be approximately twice the history length (see
635  * getHashTableSize). */
636 static struct Hist **histHashTable = NULL;
637 static unsigned histHashTableLength = 0; /* number of Hist pointers in table */
638 
639 static struct Hist * const emptyHTE = NULL;
640 static struct Hist * const deletedHTE = (struct Hist *)1;
641 
642 static struct {
643     unsigned insertCount;
644     unsigned removeCount;
645     unsigned rehashes;
646     int deleted;
647 } hashStats;
648 
649 #ifdef DEBUG_HIST
650 void
651 checkHistHashTable(int print)
652 {
653     unsigned occupied = 0;
654     unsigned deleted = 0;
655     unsigned i;
656     for (i = 0; i<histHashTableLength; i++)
657 	if (histHashTable[i] == emptyHTE)
658 	    continue;
659 	else if (histHashTable[i] == deletedHTE)
660 	    deleted++;
661 	else
662 	    occupied++;
663     if (print)
664 	xprintf("  found len %u occupied %u deleted %u\n",
665 		histHashTableLength, occupied, deleted);
666     assert(deleted == hashStats.deleted);
667 }
668 
669 static int doneTest = 0;
670 
671 /* Main entry point for displaying history statistics and hash function
672  * behavior. */
673 void
674 displayHistStats(const char *reason)
675 {
676     /* Just hash statistics for now. */
677     xprintf("%s history hash table len %u count %u (deleted %d)\n", reason,
678 	    histHashTableLength, histCount, hashStats.deleted);
679     xprintf("  inserts %u rehashes %u%% each\n",
680 	    hashStats.insertCount,
681 	    (hashStats.insertCount
682 	     ? 100*hashStats.rehashes/hashStats.insertCount : 0));
683     xprintf("  removes %u net %u\n",
684 	    hashStats.removeCount,
685 	    hashStats.insertCount - hashStats.removeCount);
686     assert(hashStats.insertCount >= hashStats.removeCount);
687     checkHistHashTable(1);
688     memset(&hashStats, 0, sizeof(hashStats));
689     if (!doneTest) {
690 	testHash();
691 	doneTest = 1;
692     }
693 }
694 #else
695 void
696 displayHistStats(const char *reason)
697 {
698     USE(reason);
699 }
700 #endif
701 
702 static void
703 discardHistHashTable(void)
704 {
705     if (histHashTable == NULL)
706         return;
707     displayHistStats("Discarding");
708     xfree(histHashTable);
709     histHashTable = NULL;
710 }
711 
712 /* Computes a new hash table size, when the current one is too small. */
713 static unsigned
714 getHashTableSize(int histlen)
715 {
716     unsigned target = histlen * 2;
717     unsigned e = 5;
718     unsigned size;
719     while ((size = 1<<e) < target)
720 	e++;
721 #ifdef PRIME_LENGTH			/* need good HTL */
722     /* Not all prime, but most are and none have factors smaller than 11. */
723     return size+15;
724 #else
725     assert((size & (size-1)) == 0);	/* must be a power of two */
726     return size;
727 #endif
728 }
729 
730 /* Create the hash table or resize, if necessary. */
731 static void
732 createHistHashTable(int histlen)
733 {
734     if (histlen == 0) {
735 	discardHistHashTable();
736         return;
737     }
738     if (histlen < 0) {
739         histlen = getn(varval(STRhistory));
740 	if (histlen == 0)
741 	    return;			/* no need for hash table */
742 	assert(histlen > 0);
743     }
744     if (histHashTable != NULL) {
745 	if (histCount < histHashTableLength * 3 / 4)
746 	    return;			/* good enough for now */
747 	discardHistHashTable();		/* too small */
748     }
749     histHashTableLength = getHashTableSize(
750 	histlen > (int)histCount ? histlen : (int)histCount);
751     histHashTable = xmalloc(histHashTableLength * sizeof(struct Hist *));
752     memset(histHashTable, 0, histHashTableLength * sizeof(struct Hist *));
753     assert(histHashTable[0] == emptyHTE);
754 
755     /* Now insert all the entries on the history list into the hash table. */
756     {
757         struct Hist *hp;
758         for (hp = &Histlist; (hp = hp->Hnext) != NULL;) {
759             unsigned lpHash = hashhist(&hp->Hlex);
760             assert(!hp->Hhash || hp->Hhash == lpHash);
761             hp->Hhash = 0;              /* force insert to new hash table */
762             insertHistHashTable(hp, lpHash);
763         }
764     }
765 }
766 
767 /* Insert np into the hash table.  We assume that np is already on the
768  * Histlist.  The specified hashval matches the new Hist entry but has not yet
769  * been assigned to Hhash (or the element is already on the hash table). */
770 static void
771 insertHistHashTable(struct Hist *np, unsigned hashval)
772 {
773     unsigned rehashes = 0;
774     unsigned hi = 0;
775     if (!histHashTable)
776 	return;
777     if (np->Hhash != 0) {
778         /* already in hash table */
779         assert(hashval == np->Hhash);
780         return;
781     }
782     assert(np != deletedHTE);
783     /* Find a free (empty or deleted) slot, using linear rehash. */
784     assert(histHashTable);
785     for (rehashes = 0;
786          ((hi = hash2tableIndex(hashval + rehashes, histHashTableLength)),
787           histHashTable[hi] != emptyHTE && histHashTable[hi] != deletedHTE);
788          rehashes++) {
789         assert(np != histHashTable[hi]);
790         if (rehashes >= histHashTableLength / 10) {
791             /* Hash table is full, so grow it.  We assume the create function
792              * will roughly double the size we give it.  Create initializes the
793              * new table with everything on the Histlist, so we are done when
794              * it returns.  */
795 #ifdef DEBUG_HIST
796 	    xprintf("Growing history hash table from %d ...",
797 		histHashTableLength);
798 	    flush();
799 #endif
800             discardHistHashTable();
801             createHistHashTable(histHashTableLength);
802 #ifdef DEBUG_HIST
803 	    xprintf("to %d.\n", histHashTableLength);
804 #endif
805             return;
806         }
807     }
808     /* Might be sensible to grow hash table if rehashes is "too big" here. */
809     if (histHashTable[hi] == deletedHTE)
810 	hashStats.deleted--;
811     histHashTable[hi] = np;
812     np->Hhash = hashval;
813     hashStats.insertCount++;
814     hashStats.rehashes += rehashes;
815 }
816 
817 /* Remove the 'np' entry from the hash table. */
818 static void
819 removeHistHashTable(struct Hist *np)
820 {
821     unsigned hi = np->Hhash;
822     if (!histHashTable || !hi)
823         return;                         /* no hash table or not on it */
824     /* find desired entry */
825     while ((hi = hash2tableIndex(hi, histHashTableLength)),
826            histHashTable[hi] != emptyHTE) {
827         if (np == histHashTable[hi]) {
828 	    unsigned i;
829 	    unsigned deletes = 0;
830 	    histHashTable[hi] = deletedHTE; /* dummy, but non-zero entry */
831 	    /* now peek ahead to see if the dummies are really necessary. */
832 	    i = 1;
833 	    while (histHashTable[hash2tableIndex(hi+i, histHashTableLength)] ==
834 		   deletedHTE)
835 		i++;
836 	    if (histHashTable[hash2tableIndex(hi+i, histHashTableLength)] ==
837 		emptyHTE) {
838 		/* dummies are no longer necessary placeholders. */
839 		deletes = i;
840 		while (i-- > 0) {
841 		    histHashTable[hash2tableIndex(hi+i, histHashTableLength)] =
842 			emptyHTE;
843 		}
844 	    }
845 	    hashStats.deleted += 1 - deletes; /* delta deleted entries */
846 	    hashStats.removeCount++;
847             return;
848         }
849         hi++;                           /* linear rehash */
850     }
851     assert(!"Hist entry not found in hash table");
852 }
853 
854 /* Search the history hash table for a command matching lp, using hashval as
855  * its hash value. */
856 static struct Hist *
857 findHistHashTable(struct wordent *lp, unsigned hashval)
858 {
859     unsigned deleted = 0;		/* number of deleted entries skipped */
860     unsigned hi = hashval;
861     struct Hist *hp;
862     if (!histHashTable)
863 	return NULL;
864     while ((hi = hash2tableIndex(hi, histHashTableLength)),
865            (hp = histHashTable[hi]) != emptyHTE) {
866         if (hp == deletedHTE)
867 	    deleted++;
868 	else if (hp->Hhash == hashval && heq(lp, &(hp->Hlex)))
869             return hp;
870 	if (deleted > (histHashTableLength>>4)) {
871 	    /* lots of deletes, so we need a sparser table. */
872             discardHistHashTable();
873             createHistHashTable(histHashTableLength);
874 	    return findHistHashTable(lp, hashval);
875 	}
876         hi++;                           /* linear rehash */
877     }
878     return NULL;
879 }
880 
881 /* When merge semantics are in use, find the approximate predecessor for the
882  * new entry, so that the Htime entries are decreasing.  Return the entry just
883  * before the first entry with equal times, so the caller can check for
884  * duplicates.  When pTime is not NULL, use it as a starting point for search,
885  * otherwise search from beginning (largest time value) of history list. */
886 PG_STATIC struct Hist *
887 mergeInsertionPoint(
888     struct Hist *np,                      /* new entry to be inserted */
889     struct Hist *pTime)                   /* hint about where to insert */
890 {
891     struct Hist *pp, *p;
892     if (histTail && histTail->Htime >= np->Htime)
893 	pTime = histTail;		/* new entry goes at the end */
894     if (histMerg && histMerg != &Histlist && histMerg != Histlist.Hnext) {
895 	/* Check above and below previous insertion point, in case we're adding
896 	 * sequential times in the middle of the list (e.g. history -M). */
897 	if (histMerg->Htime >= np->Htime)
898 	    pTime = histMerg;
899 	else if (histMerg->Hprev->Htime >= np->Htime)
900 	    pTime = histMerg->Hprev;
901     }
902     if (pTime) {
903         /* With hint, search up the list until Htime is greater.  We skip past
904          * the equal ones, too, so our caller can elide duplicates. */
905         pp = pTime;
906         while (pp != &Histlist && pp->Htime <= np->Htime)
907             pp = pp->Hprev;
908     } else
909         pp = &Histlist;
910     /* Search down the list while current entry's time is too large. */
911     while ((p = pp->Hnext) && (p->Htime > np->Htime))
912             pp = p;                     /* advance insertion point */
913     /* Remember recent position as hint for next time */
914     histMerg = pp;
915     return pp;
916 }
917 
918 /* Bubble Hnum & Href in new entry down to pp through earlier part of list. */
919 PG_STATIC void bubbleHnumHrefDown(struct Hist *np, struct Hist *pp)
920 {
921     struct Hist *p;
922     for (p = Histlist.Hnext; p != pp->Hnext; p = p->Hnext) {
923         /* swap Hnum & Href values of p and np. */
924         int n = p->Hnum, r = p->Href;
925         p->Hnum = np->Hnum; p->Href = np->Href;
926         np->Hnum = n; np->Href = r;
927     }
928 }
929 
930 /* Enter new command into the history list according to current settings. */
931 struct Hist *
932 enthist(
933   int event,				/* newly incremented global eventno */
934   struct wordent *lp,
935   int docopy,
936   int mflg,				/* true if merge requested */
937   int histlen)				/* -1 if unknown */
938 {
939     struct Hist *p = NULL, *pp = &Histlist, *pTime = NULL;
940     struct Hist *np;
941     const Char *dp;
942     unsigned lpHash = 0;                /* non-zero if hashing entries */
943 
944     if ((dp = varval(STRhistdup)) != STRNULL) {
945 	if (eq(dp, STRerase)) {
946 	    /* masaoki@akebono.tky.hp.com (Kobayashi Masaoki) */
947             createHistHashTable(histlen);
948             lpHash = hashhist(lp);
949             assert(lpHash != 0);
950             p = findHistHashTable(lp, lpHash);
951             if (p) {
952 		if (Htime != 0 && p->Htime > Htime)
953 		    Htime = p->Htime;
954                 /* If we are merging, and the old entry is at the place we want
955                  * to insert the new entry, then remember the place. */
956                 if (mflg && Htime != 0 && p->Hprev->Htime >= Htime)
957                     pTime = p->Hprev;
958 		if (!fastMergeErase)
959 		    renumberHist(p);	/* Reset Href of subsequent entries */
960                 hremove(p);
961 		hfree(p);
962                 p = NULL;               /* so new entry is allocated below */
963 	    }
964 	}
965 	else if (eq(dp, STRall)) {
966             createHistHashTable(histlen);
967             lpHash = hashhist(lp);
968             assert(lpHash != 0);
969             p = findHistHashTable(lp, lpHash);
970 	    if (p)   /* p!=NULL, only update this entry's Htime below */
971 		eventno--;		/* not adding a new event */
972 	}
973 	else if (eq(dp, STRprev)) {
974 	    if (pp->Hnext && heq(lp, &(pp->Hnext->Hlex))) {
975 		p = pp->Hnext;
976 		eventno--;
977 	    }
978 	}
979     }
980 
981     np = p ? p : xmalloc(sizeof(*np));
982 
983     /* Pick up timestamp set by lex() in Htime if reading saved history */
984     if (Htime != 0) {
985 	np->Htime = Htime;
986 	Htime = 0;
987     }
988     else
989         (void) time(&(np->Htime));
990 
991     if (p == np)
992         return np;                      /* reused existing entry */
993 
994     /* Initialize the new entry. */
995     np->Hnum = np->Href = event;
996     if (docopy) {
997         copylex(&np->Hlex, lp);
998 	if (histvalid)
999 	    np->histline = Strsave(histline.s);
1000 	else
1001 	    np->histline = NULL;
1002     }
1003     else {
1004 	np->Hlex.next = lp->next;
1005 	lp->next->prev = &np->Hlex;
1006 	np->Hlex.prev = lp->prev;
1007         lp->prev->next = &np->Hlex;
1008         np->histline = NULL;
1009     }
1010     np->Hhash = 0;
1011 
1012     /* The head of history list is the default insertion point.
1013        If merging, advance insertion point, in pp, according to Htime. */
1014     /* XXX -- In histdup=all, Htime values can be non-monotonic. */
1015     if (mflg) {                         /* merge according to np->Htime */
1016         pp = mergeInsertionPoint(np, pTime);
1017         for (p = pp->Hnext; p && p->Htime == np->Htime; pp = p, p = p->Hnext) {
1018             if (heq(&p->Hlex, &np->Hlex)) {
1019                 eventno--;              /* duplicate, so don't add new event */
1020                 hfree(np);
1021                 return (p);
1022               }
1023           }
1024         /* pp is now the last entry with time >= to np. */
1025 	if (!fastMergeErase) {		/* renumber at end of loadhist */
1026 	    /* Before inserting np after pp, bubble its Hnum & Href values down
1027 	     * through the earlier part of list. */
1028 	    bubbleHnumHrefDown(np, pp);
1029 	}
1030     }
1031     else
1032         pp = &Histlist;                 /* insert at beginning of history */
1033     hinsert(np, pp);
1034     if (lpHash && histlen != 0)		/* erase & all modes use hash table */
1035         insertHistHashTable(np, lpHash);
1036     else
1037         discardHistHashTable();
1038     return (np);
1039 }
1040 
1041 static void
1042 hfree(struct Hist *hp)
1043 {
1044     assert(hp != histMerg);
1045     if (hp->Hhash)
1046         removeHistHashTable(hp);
1047     freelex(&hp->Hlex);
1048     if (hp->histline)
1049         xfree(hp->histline);
1050     xfree(hp);
1051 }
1052 
1053 PG_STATIC void
1054 phist(struct Hist *hp, int hflg)
1055 {
1056     if (hflg & HIST_ONLY) {
1057 	int old_output_raw;
1058 
1059        /*
1060         * Control characters have to be written as is (output_raw).
1061         * This way one can preserve special characters (like tab) in
1062         * the history file.
1063         * From: mveksler@vnet.ibm.com (Veksler Michael)
1064         */
1065 	old_output_raw = output_raw;
1066         output_raw = 1;
1067 	cleanup_push(&old_output_raw, output_raw_restore);
1068 	if (hflg & HIST_TIME)
1069 	    /*
1070 	     * Make file entry with history time in format:
1071 	     * "+NNNNNNNNNN" (10 digits, left padded with ascii '0')
1072 	     */
1073 
1074 	    xprintf("#+%010lu\n", (unsigned long)hp->Htime);
1075 
1076 	if (HistLit && hp->histline)
1077 	    xprintf("%S\n", hp->histline);
1078 	else
1079 	    prlex(&hp->Hlex);
1080         cleanup_until(&old_output_raw);
1081     }
1082     else {
1083 	Char   *cp = str2short("%h\t%T\t%R\n");
1084 	Char *p;
1085 	struct varent *vp = adrof(STRhistory);
1086 
1087 	if (vp && vp->vec != NULL && vp->vec[0] && vp->vec[1])
1088 	    cp = vp->vec[1];
1089 
1090 	p = tprintf(FMT_HISTORY, cp, NULL, hp->Htime, hp);
1091 	cleanup_push(p, xfree);
1092 	for (cp = p; *cp;)
1093 	    xputwchar(*cp++);
1094 	cleanup_until(p);
1095     }
1096 }
1097 
1098 PG_STATIC void
1099 dophist(int n, int hflg)
1100 {
1101     struct Hist *hp;
1102     if (setintr) {
1103 	int old_pintr_disabled;
1104 
1105 	pintr_push_enable(&old_pintr_disabled);
1106 	cleanup_until(&old_pintr_disabled);
1107     }
1108     if ((hflg & HIST_REV) == 0) {
1109 	/* Since the history list is stored most recent first, non-reversing
1110 	 * print needs to print (backwards) up the list. */
1111 	if ((unsigned)n >= histCount)
1112 	    hp = histTail;
1113 	else {
1114 	    for (hp = Histlist.Hnext;
1115 		 --n > 0 && hp->Hnext != NULL;
1116 		 hp = hp->Hnext)
1117 		;
1118 	}
1119 	if (hp == NULL)
1120 	    return;			/* nothing to print */
1121 	for (; hp != &Histlist; hp = hp->Hprev)
1122 	    phist(hp, hflg);
1123     } else {
1124 	for (hp = Histlist.Hnext; n-- > 0 && hp != NULL; hp = hp->Hnext)
1125 	    phist(hp, hflg);
1126     }
1127 }
1128 
1129 /*ARGSUSED*/
1130 void
1131 dohist(Char **vp, struct command *c)
1132 {
1133     int     n, hflg = 0;
1134 
1135     USE(c);
1136     if (getn(varval(STRhistory)) == 0)
1137 	return;
1138     while (*++vp && **vp == '-') {
1139 	Char   *vp2 = *vp;
1140 
1141 	while (*++vp2)
1142 	    switch (*vp2) {
1143 	    case 'c':
1144 		hflg |= HIST_CLEAR;
1145 		break;
1146 	    case 'h':
1147 		hflg |= HIST_ONLY;
1148 		break;
1149 	    case 'r':
1150 		hflg |= HIST_REV;
1151 		break;
1152 	    case 'S':
1153 		hflg |= HIST_SAVE;
1154 		break;
1155 	    case 'L':
1156 		hflg |= HIST_LOAD;
1157 		break;
1158 	    case 'M':
1159 	    	hflg |= HIST_MERGE;
1160 		break;
1161 	    case 'T':
1162 	    	hflg |= HIST_TIME;
1163 		break;
1164 	    default:
1165 		stderror(ERR_HISTUS, "chrSLMT");
1166 		break;
1167 	    }
1168     }
1169     if (hflg & HIST_CLEAR) {
1170         struct Hist *np, *hp;
1171         for (hp = &Histlist; (np = hp->Hnext) != NULL;)
1172             hremove(np), hfree(np);
1173     }
1174 
1175     if (hflg & (HIST_LOAD | HIST_MERGE))
1176 	loadhist(*vp, (hflg & HIST_MERGE) ? 1 : 0);
1177     else if (hflg & HIST_SAVE)
1178 	rechist(*vp, 1);
1179     else {
1180 	if (*vp)
1181 	    n = getn(*vp);
1182 	else {
1183 	    n = getn(varval(STRhistory));
1184 	}
1185 	dophist(n, hflg);
1186     }
1187 }
1188 
1189 
1190 char *
1191 fmthist(int fmt, ptr_t ptr)
1192 {
1193     struct Hist *hp = ptr;
1194     char *buf;
1195 
1196     switch (fmt) {
1197     case 'h':
1198 	return xasprintf("%6d", hp->Hnum);
1199     case 'R':
1200 	if (HistLit && hp->histline)
1201 	    return xasprintf("%S", hp->histline);
1202 	else {
1203 	    Char *istr, *ip;
1204 	    char *p;
1205 
1206 	    istr = sprlex(&hp->Hlex);
1207 	    buf = xmalloc(Strlen(istr) * MB_LEN_MAX + 1);
1208 
1209 	    for (p = buf, ip = istr; *ip != '\0'; ip++)
1210 		p += one_wctomb(p, CHAR & *ip);
1211 
1212 	    *p = '\0';
1213 	    xfree(istr);
1214 	    return buf;
1215 	}
1216     default:
1217 	buf = xmalloc(1);
1218 	buf[0] = '\0';
1219 	return buf;
1220     }
1221 }
1222 
1223 /* Save history before exiting the shell. */
1224 void
1225 rechist(Char *fname, int ref)
1226 {
1227     Char    *snum;
1228     int     fp, ftmp, oldidfds;
1229     struct varent *shist;
1230     static Char   *dumphist[] = {STRhistory, STRmhT, 0, 0};
1231 
1232     if (fname == NULL && !ref)
1233 	return;
1234     /*
1235      * If $savehist is just set, we use the value of $history
1236      * else we use the value in $savehist
1237      */
1238     if (((snum = varval(STRsavehist)) == STRNULL) &&
1239 	((snum = varval(STRhistory)) == STRNULL))
1240 	snum = STRmaxint;
1241 
1242 
1243     if (fname == NULL) {
1244 	if ((fname = varval(STRhistfile)) == STRNULL)
1245 	    fname = Strspl(varval(STRhome), &STRtildothist[1]);
1246 	else
1247 	    fname = Strsave(fname);
1248     }
1249     else
1250 	fname = globone(fname, G_ERROR);
1251     cleanup_push(fname, xfree);
1252 
1253     /*
1254      * The 'savehist merge' feature is intended for an environment
1255      * with numerous shells being in simultaneous use. Imagine
1256      * any kind of window system. All these shells 'share' the same
1257      * ~/.history file for recording their command line history.
1258      * Currently the automatic merge can only succeed when the shells
1259      * nicely quit one after another.
1260      *
1261      * Users that like to nuke their environment require here an atomic
1262      * 	loadhist-creat-dohist(dumphist)-close
1263      * sequence.
1264      *
1265      * jw.
1266      */
1267     /*
1268      * We need the didfds stuff before loadhist otherwise
1269      * exec in a script will fail to print if merge is set.
1270      * From: mveksler@iil.intel.com (Veksler Michael)
1271      */
1272     oldidfds = didfds;
1273     didfds = 0;
1274     if ((shist = adrof(STRsavehist)) != NULL && shist->vec != NULL)
1275 	if (shist->vec[1] && eq(shist->vec[1], STRmerge))
1276 	    loadhist(fname, 1);
1277 
1278     fp = xcreat(short2str(fname), 0600);
1279     cleanup_until(fname);
1280     if (fp == -1) {
1281 	didfds = oldidfds;
1282 	return;
1283     }
1284     ftmp = SHOUT;
1285     SHOUT = fp;
1286     dumphist[2] = snum;
1287     dohist(dumphist, NULL);
1288     xclose(fp);
1289     SHOUT = ftmp;
1290     didfds = oldidfds;
1291 }
1292 
1293 
1294 /* This is the entry point for loading history data from a file. */
1295 void
1296 loadhist(Char *fname, int mflg)
1297 {
1298     static Char   *loadhist_cmd[] = {STRsource, NULL, NULL, NULL};
1299     loadhist_cmd[1] = mflg ? STRmm : STRmh;
1300 
1301     if (fname != NULL)
1302 	loadhist_cmd[2] = fname;
1303     else if ((fname = varval(STRhistfile)) != STRNULL)
1304 	loadhist_cmd[2] = fname;
1305     else
1306 	loadhist_cmd[2] = STRtildothist;
1307 
1308     dosource(loadhist_cmd, NULL);
1309 
1310     /* During history merging (enthist sees mflg set), we disable management of
1311      * Hnum and Href (because fastMergeErase is true).  So now reset all the
1312      * values based on the final ordering of the history list. */
1313     if (mflg) {
1314 	int n = eventno;
1315         struct Hist *hp = &Histlist;
1316         while ((hp = hp->Hnext))
1317 	    hp->Hnum = hp->Href = n--;
1318     }
1319 }
1320