xref: /illumos-gate/usr/src/tools/cscope-fast/crossref.c (revision d48be21240dfd051b689384ce2b23479d757f2d8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*	Copyright (c) 1988 AT&T	*/
23 /*	  All Rights Reserved  	*/
24 
25 
26 /*
27  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 /*
32  *	cscope - interactive C symbol cross-reference
33  *
34  *	build cross-reference file
35  */
36 
37 #include "global.h"
38 
39 /* convert long to a string */
40 #define	ltobase(value)	n = value; \
41 			s = buf + (sizeof (buf) - 1); \
42 			*s = '\0'; \
43 			digits = 1; \
44 			while (n >= BASE) { \
45 				++digits; \
46 				i = n; \
47 				n /= BASE; \
48 				*--s = i - n * BASE + '!'; \
49 			} \
50 			*--s = n + '!';
51 
52 #define	SYMBOLINC	20	/* symbol list size increment */
53 #define	FREAD	"r"		/* fopen for reading */
54 
55 long	dboffset;		/* new database offset */
56 BOOL	errorsfound;		/* prompt before clearing messages */
57 long	fileindex;		/* source file name index */
58 long	lineoffset;		/* source line database offset */
59 long	npostings;		/* number of postings */
60 int	nsrcoffset;		/* number of file name database offsets */
61 long	*srcoffset;		/* source file name database offsets */
62 int	symbols;		/* number of symbols */
63 
64 static	char	*filename;	/* file name for warning messages */
65 static	long	fcnoffset;	/* function name database offset */
66 static	long	macrooffset;	/* macro name database offset */
67 static	int	msymbols = SYMBOLINC;	/* maximum number of symbols */
68 static	struct	symbol {	/* symbol data */
69 	int	type;		/* type */
70 	int	first;		/* index of first character in text */
71 	int	last;		/* index of last+1 character in text */
72 	int	length;		/* symbol length */
73 } *symbol;
74 
75 static void putcrossref(void);
76 
77 void
78 crossref(char *srcfile)
79 {
80 	int	i;
81 	int	length;		/* symbol length */
82 	int	token;			/* current token */
83 
84 	/* open the source file */
85 	if ((yyin = vpfopen(srcfile, FREAD)) == NULL) {
86 		cannotopen(srcfile);
87 		errorsfound = YES;
88 		return;
89 	}
90 	filename = srcfile;	/* save the file name for warning messages */
91 	putfilename(srcfile);	/* output the file name */
92 	dbputc('\n');
93 	dbputc('\n');
94 
95 	/* read the source file */
96 	initscanner(srcfile);
97 	fcnoffset = macrooffset = 0;
98 	symbols = 0;
99 	if (symbol == NULL) {
100 		symbol = mymalloc(msymbols * sizeof (struct symbol));
101 	}
102 	for (;;) {
103 
104 		/* get the next token */
105 		switch (token = yylex()) {
106 		default:
107 			/* if requested, truncate C symbols */
108 			length = last - first;
109 			if (truncatesyms && length > 8 &&
110 			    token != INCLUDE && token != NEWFILE) {
111 				length = 8;
112 				last = first + 8;
113 			}
114 			/* see if the token has a symbol */
115 			if (length == 0) {
116 				savesymbol(token);
117 				break;
118 			}
119 			/* see if the symbol is already in the list */
120 			for (i = 0; i < symbols; ++i) {
121 				if (length == symbol[i].length &&
122 				    strncmp(yytext + first, yytext +
123 					symbol[i].first, length) == 0 &&
124 				    (token == IDENT ||
125 					token == symbol[i].type)) {
126 					first = yyleng;
127 					break;
128 				}
129 			}
130 			if (i == symbols) {	/* if not already in list */
131 				savesymbol(token);
132 			}
133 			break;
134 
135 		case NEWLINE:	/* end of line containing symbols */
136 			--yyleng;	/* remove the newline */
137 			putcrossref();	/* output the symbols and source line */
138 			lineno = yylineno; /* save the symbol line number */
139 			break;
140 
141 		case LEXEOF:	/* end of file; last line may not have \n */
142 
143 			/*
144 			 * if there were symbols, output them and the
145 			 * source line
146 			 */
147 			if (symbols > 0) {
148 				putcrossref();
149 			}
150 			(void) fclose(yyin);	/* close the source file */
151 
152 			/* output the leading tab expected by the next call */
153 			dbputc('\t');
154 			return;
155 		}
156 	}
157 }
158 
159 /* save the symbol in the list */
160 
161 void
162 savesymbol(int token)
163 {
164 	/* make sure there is room for the symbol */
165 	if (symbols == msymbols) {
166 		msymbols += SYMBOLINC;
167 		symbol = (struct symbol *)myrealloc(symbol,
168 		    msymbols * sizeof (struct symbol));
169 	}
170 	/* save the symbol */
171 	symbol[symbols].type = token;
172 	symbol[symbols].first = first;
173 	symbol[symbols].last = last;
174 	symbol[symbols].length = last - first;
175 	++symbols;
176 	first = yyleng;
177 }
178 
179 /* output the file name */
180 
181 void
182 putfilename(char *srcfile)
183 {
184 	/* check for file system out of space */
185 	/* note: dbputc is not used to avoid lint complaint */
186 	if (putc(NEWFILE, newrefs) == EOF) {
187 		cannotwrite(newreffile);
188 		/* NOTREACHED */
189 	}
190 	++dboffset;
191 	if (invertedindex) {
192 		srcoffset[nsrcoffset++] = dboffset;
193 	}
194 	dbfputs(srcfile);
195 	fcnoffset = macrooffset = 0;
196 }
197 
198 /* output the symbols and source line */
199 
200 static void
201 putcrossref(void)
202 {
203 	int	i, j;
204 	unsigned c;
205 	BOOL	blank = NO;	/* output blank */
206 	BOOL	newline = NO;	/* output newline */
207 	int	symput = 0;	/* symbols output */
208 	int	type;
209 
210 	/* output the source line */
211 	lineoffset = dboffset;
212 	dbfprintf(newrefs, "%d ", lineno);
213 	for (i = 0; i < yyleng; ++i) {
214 
215 		/* change a tab to a blank and compress blanks */
216 		if ((c = yytext[i]) == ' ' || c == '\t') {
217 			blank = YES;
218 		}
219 		/* look for the start of a symbol */
220 		else if (symput < symbols && i == symbol[symput].first) {
221 
222 			/* check for compressed blanks */
223 			if (blank) {
224 				blank = NO;
225 				if (newline) {
226 					dbputc('\n');
227 				}
228 				dbputc(' ');
229 			}
230 			dbputc('\n');	/* symbols start on a new line */
231 
232 			/* output any symbol type */
233 			if ((type = symbol[symput].type) != IDENT) {
234 				dbputc('\t');
235 				dbputc(type);
236 			} else {
237 				type = ' ';
238 			}
239 			/* output the symbol */
240 			j = symbol[symput].last;
241 			c = yytext[j];
242 			yytext[j] = '\0';
243 			if (invertedindex) {
244 				putposting(yytext + i, type);
245 			}
246 			putstring(yytext + i);
247 			newline = YES;
248 			yytext[j] = (char)c;
249 			i = j - 1;
250 			++symput;
251 		} else {
252 			if (newline) {
253 				newline = NO;
254 				dbputc('\n');
255 			}
256 			/* check for compressed blanks */
257 			if (blank) {
258 				if (dicode2[c]) {
259 					c = (0200 - 2) + dicode1[' '] +
260 					    dicode2[c];
261 				} else {
262 					dbputc(' ');
263 				}
264 			} else if (dicode1[c] &&
265 			    (j = dicode2[(unsigned)yytext[i + 1]]) != 0 &&
266 			    symput < symbols && i + 1 != symbol[symput].first) {
267 				/* compress digraphs */
268 				c = (0200 - 2) + dicode1[c] + j;
269 				++i;
270 			}
271 			/*
272 			 * if the last line of the file is a '}' without a
273 			 * newline, the lex EOF code overwrites it with a 0
274 			 */
275 			if (c) {
276 				dbputc((int)c);
277 			} else {
278 				dbputc(' ');
279 			}
280 			blank = NO;
281 
282 			/* skip compressed characters */
283 			if (c < ' ') {
284 				++i;
285 
286 				/* skip blanks before a preprocesor keyword */
287 				/*
288 				 * note: don't use isspace() because \f and \v
289 				 * are used for keywords
290 				 */
291 				while ((j = yytext[i]) == ' ' || j == '\t') {
292 					++i;
293 				}
294 				/* skip the rest of the keyword */
295 				while (isalpha(yytext[i])) {
296 					++i;
297 				}
298 				/* skip space after certain keywords */
299 				if (keyword[c].delim != '\0') {
300 					while ((j = yytext[i]) == ' ' ||
301 					    j == '\t') {
302 						++i;
303 					}
304 				}
305 				/* skip a '(' after certain keywords */
306 				if (keyword[c].delim == '(' &&
307 				    yytext[i] == '(') {
308 					++i;
309 				}
310 				--i;	/* compensate for ++i in for() */
311 			}
312 		}
313 	}
314 	/* ignore trailing blanks */
315 	dbputc('\n');
316 	dbputc('\n');
317 
318 	/* output any #define end marker */
319 	/*
320 	 * note: must not be part of #define so putsource() doesn't discard it
321 	 * so findcalledbysub() can find it and return
322 	 */
323 	if (symput < symbols && symbol[symput].type == DEFINEEND) {
324 		dbputc('\t');
325 		dbputc(DEFINEEND);
326 		dbputc('\n');
327 		dbputc('\n');	/* mark beginning of next source line */
328 		macrooffset = 0;
329 	}
330 	symbols = 0;
331 }
332 
333 /* output the inverted index posting */
334 
335 void
336 putposting(char *term, int type)
337 {
338 	long	i, n;
339 	char	*s;
340 	int	digits;		/* digits output */
341 	long	offset;		/* function/macro database offset */
342 	char	buf[11];		/* number buffer */
343 
344 	/* get the function or macro name offset */
345 	offset = fcnoffset;
346 	if (macrooffset != 0) {
347 		offset = macrooffset;
348 	}
349 	/* then update them to avoid negative relative name offset */
350 	switch (type) {
351 	case DEFINE:
352 		macrooffset = dboffset;
353 		break;
354 	case DEFINEEND:
355 		macrooffset = 0;
356 		return;		/* null term */
357 	case FCNDEF:
358 		fcnoffset = dboffset;
359 		break;
360 	case FCNEND:
361 		fcnoffset = 0;
362 		return;		/* null term */
363 	}
364 	/* ignore a null term caused by a enum/struct/union without a tag */
365 	if (*term == '\0') {
366 		return;
367 	}
368 	/* skip any #include secondary type char (< or ") */
369 	if (type == INCLUDE) {
370 		++term;
371 	}
372 	/*
373 	 * output the posting, which should be as small as possible to reduce
374 	 * the temp file size and sort time
375 	 */
376 	(void) fputs(term, postings);
377 	(void) putc(' ', postings);
378 
379 	/*
380 	 * the line offset is padded so postings for the same term will sort
381 	 * in ascending line offset order to order the references as they
382 	 * appear withing a source file
383 	 */
384 	ltobase(lineoffset);
385 	for (i = PRECISION - digits; i > 0; --i) {
386 		(void) putc('!', postings);
387 	}
388 	do {
389 		(void) putc(*s, postings);
390 	} while (*++s != '\0');
391 
392 	/* postings are also sorted by type */
393 	(void) putc(type, postings);
394 
395 	/* function or macro name offset */
396 	if (offset > 0) {
397 		(void) putc(' ', postings);
398 		ltobase(offset);
399 		do {
400 			(void) putc(*s, postings);
401 		} while (*++s != '\0');
402 	}
403 	if (putc('\n', postings) == EOF) {
404 		cannotwrite(temp1);
405 		/* NOTREACHED */
406 	}
407 	++npostings;
408 }
409 
410 /* put the string into the new database */
411 
412 void
413 putstring(char *s)
414 {
415 	unsigned c;
416 	int	i;
417 
418 	/* compress digraphs */
419 	for (i = 0; (c = s[i]) != '\0'; ++i) {
420 		if (dicode1[c] && dicode2[(unsigned)s[i + 1]]) {
421 			c = (0200 - 2) + dicode1[c] +
422 			    dicode2[(unsigned)s[i + 1]];
423 			++i;
424 		}
425 		dbputc((int)c);
426 	}
427 }
428 
429 /* print a warning message with the file name and line number */
430 
431 void
432 warning(text)
433 char	*text;
434 {
435 	extern	int	yylineno;
436 
437 	(void) fprintf(stderr, "cscope: \"%s\", line %d: warning: %s\n",
438 	    filename, yylineno, text);
439 	errorsfound = YES;
440 }
441