1 /* 2 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 7 /* All Rights Reserved */ 8 9 /* 10 * Copyright (c) 1980 Regents of the University of California. 11 * All rights reserved. The Berkeley software License Agreement 12 * specifies the terms and conditions for redistribution. 13 */ 14 15 #include <stdio.h> 16 #include <locale.h> 17 #include <assert.h> 18 19 extern void err(); 20 extern int newkeys(); 21 extern int recopy(); 22 extern void whash(); 23 24 int 25 main(int argc, char *argv[]) 26 { 27 /* 28 * Make inverted file indexes. Reads a stream from mkey which 29 * gives record pointer items and keys. Generates set of files 30 * a. NHASH pointers to file b. 31 * b. lists of record numbers. 32 * c. record pointer items. 33 * 34 * these files are named xxx.ia, xxx.ib, xxx.ic; 35 * where xxx is taken from arg1. 36 * If the files exist they are updated. 37 */ 38 39 FILE *fa, *fb, *fc, *fta, *ftb, *ftc; 40 FILE *fd = NULL; 41 int nhash = 256; 42 int appflg = 1; 43 int keepkey = 0, pipein = 0; 44 char nma[100], nmb[100], nmc[100], com[100], nmd[100]; 45 char tmpa[20], tmpb[20], tmpc[20]; 46 char *remove = NULL; 47 int chatty = 0, docs, hashes, fp[2], fr, fw, pfork, pwait, status; 48 int i, j, k; 49 long keys; 50 int iflong = 0; 51 char *sortdir; 52 53 (void) setlocale(LC_ALL, ""); 54 55 #if !defined(TEXT_DOMAIN) 56 #define TEXT_DOMAIN "SYS_TEST" 57 #endif 58 (void) textdomain(TEXT_DOMAIN); 59 60 sortdir = (access("/crp/tmp", 06) == 0) ? "/crp/tmp" : "/usr/tmp"; 61 while (argc > 1 && argv[1][0] == '-') { 62 switch (argv[1][1]) { 63 case 'h': /* size of hash table */ 64 nhash = atoi(argv[1]+2); 65 break; 66 case 'n': /* new, don't append */ 67 appflg = 0; 68 break; 69 case 'a': /* append to old file */ 70 appflg = 1; 71 break; 72 case 'v': /* verbose output */ 73 chatty = 1; 74 break; 75 case 'd': /* keep keys on file .id for check on searching */ 76 keepkey = 1; 77 break; 78 case 'p': /* pipe into sort (saves space, costs time) */ 79 pipein = 1; 80 break; 81 case 'i': /* input is on file, not stdin */ 82 close(0); 83 if (open(argv[2], 0) != 0) 84 err(gettext("Can't read input %s"), argv[2]); 85 if (argv[1][2] == 'u') /* unlink */ 86 remove = argv[2]; 87 argc--; 88 argv++; 89 break; 90 } 91 argc--; 92 argv++; 93 } 94 strcpy(nma, argc >= 2 ? argv[1] : "Index"); 95 strcpy(nmb, nma); 96 strcpy(nmc, nma); 97 strcpy(nmd, nma); 98 strcat(nma, ".ia"); 99 strcat(nmb, ".ib"); 100 strcat(nmc, ".ic"); 101 strcat(nmd, ".id"); 102 103 sprintf(tmpa, "junk%di", getpid()); 104 if (pipein) { 105 sprintf(com, "/usr/bin/sort -T %s -o %s", sortdir, tmpa); 106 fta = popen(com, "w"); 107 } else { /* use tmp file */ 108 fta = fopen(tmpa, "w"); 109 assert(fta != NULL); 110 } 111 fb = 0; 112 if (appflg) { 113 if (fb = fopen(nmb, "r")) { 114 sprintf(tmpb, "junk%dj", getpid()); 115 ftb = fopen(tmpb, "w"); 116 if (ftb == NULL) 117 err(gettext("Can't get scratch file %s"), tmpb); 118 nhash = recopy(ftb, fb, fopen(nma, "r")); 119 fclose(ftb); 120 } else 121 appflg = 0; 122 } 123 fc = fopen(nmc, appflg ? "a" : "w"); 124 if (keepkey) 125 fd = keepkey ? fopen(nmd, "w") : 0; 126 docs = newkeys(fta, stdin, fc, nhash, fd, &iflong); 127 fclose(stdin); 128 if (remove != NULL) 129 unlink(remove); 130 fclose(fta); 131 if (pipein) { 132 pclose(fta); 133 } 134 else 135 { 136 sprintf(com, "sort -T %s %s -o %s", sortdir, tmpa, tmpa); 137 system(com); 138 } 139 if (appflg) { 140 sprintf(tmpc, "junk%dk", getpid()); 141 sprintf(com, "mv %s %s", tmpa, tmpc); 142 system(com); 143 sprintf(com, "sort -T %s -m %s %s -o %s", sortdir, 144 tmpb, tmpc, tmpa); 145 system(com); 146 } 147 fta = fopen(tmpa, "r"); 148 fa = fopen(nma, "w"); 149 fb = fopen(nmb, "w"); 150 whash(fta, fa, fb, nhash, iflong, &keys, &hashes); 151 fclose(fta); 152 #ifndef D1 153 unlink(tmpa); 154 #endif 155 if (appflg) { 156 unlink(tmpb); 157 unlink(tmpc); 158 } 159 if (chatty) 160 printf(gettext("%ld key occurrences, %d hashes, %d docs\n"), 161 keys, hashes, docs); 162 163 return (0); 164 } 165