xref: /titanic_41/usr/src/tools/aw/aw.c (revision 299d957d28501350bcae7fa4333ff183c062c427)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 /*
29  * Wrapper for the GNU assembler to make it accept the Sun assembler
30  * arguments where possible.
31  *
32  * There are several limitations; the Sun assembler takes multiple
33  * source files, we only take one.
34  *
35  * -b, -s, -xF, -T plain not supported.
36  * -S isn't supported either, because while GNU as does generate
37  * listings with -a, there's no obvious mapping between sub-options.
38  * -K pic, -K PIC not supported either, though it's not clear what
39  * these actually do ..
40  * -Qy (not supported) adds a string to the .comment section
41  * describing the assembler version, while
42  * -Qn (supported) suppresses the string (also the default).
43  *
44  * We also add '-#' support to see invocation lines..
45  * We also add '-xarch=amd64' in case we need to feed the assembler
46  * something different (or in case we need to invoke a different binary
47  * altogether!)
48  */
49 
50 #include <sys/types.h>
51 #include <sys/wait.h>
52 #include <stdio.h>
53 #include <unistd.h>
54 #include <string.h>
55 #include <stdlib.h>
56 #include <sys/param.h>
57 
58 static const char *progname;
59 static int verbose;
60 
61 struct aelist {
62 	int ael_argc;
63 	struct ae {
64 		struct ae *ae_next;
65 		char *ae_arg;
66 	} *ael_head, *ael_tail;
67 };
68 
69 static struct aelist *
70 newael(void)
71 {
72 	return (calloc(sizeof (struct aelist), 1));
73 }
74 
75 static void
76 newae(struct aelist *ael, const char *arg)
77 {
78 	struct ae *ae;
79 
80 	ae = calloc(sizeof (*ae), 1);
81 	ae->ae_arg = strdup(arg);
82 	if (ael->ael_tail == NULL)
83 		ael->ael_head = ae;
84 	else
85 		ael->ael_tail->ae_next = ae;
86 	ael->ael_tail = ae;
87 	ael->ael_argc++;
88 }
89 
90 static void
91 fixae_arg(struct ae *ae, const char *newarg)
92 {
93 	free(ae->ae_arg);
94 	ae->ae_arg = strdup(newarg);
95 }
96 
97 static char **
98 aeltoargv(struct aelist *ael)
99 {
100 	struct ae *ae;
101 	char **argv;
102 	int argc;
103 
104 	argv = calloc(sizeof (*argv), ael->ael_argc + 1);
105 
106 	for (argc = 0, ae = ael->ael_head; ae; ae = ae->ae_next, argc++) {
107 		argv[argc] = ae->ae_arg;
108 		if (ae == ael->ael_tail)
109 			break;
110 	}
111 
112 	return (argv);
113 }
114 
115 static int
116 error(const char *arg)
117 {
118 	(void) fprintf(stderr,
119 	    "%s: as->gas mapping failed at or near arg '%s'\n", progname, arg);
120 	return (2);
121 }
122 
123 static int
124 usage(const char *arg)
125 {
126 	if (arg != NULL)
127 		(void) fprintf(stderr, "error: %s\n", arg);
128 	(void) fprintf(stderr, "Usage: %s [-V] [-#]\n"
129 	    "\t[-xarch=architecture]\n"
130 	    "\t[-o objfile] [-L]\n"
131 	    "\t[-P [[-Ipath] [-Dname] [-Dname=def] [-Uname]]...]\n"
132 	    "\t[-m] [-n] file.s ...\n", progname);
133 	return (3);
134 }
135 
136 static void
137 copyuntil(FILE *in, FILE *out, int termchar)
138 {
139 	int c;
140 
141 	while ((c = fgetc(in)) != EOF) {
142 		if (out && fputc(c, out) == EOF)
143 			exit(1);
144 		if (c == termchar)
145 			break;
146 	}
147 }
148 
149 /*
150  * Variant of copyuntil(), used for copying the path used
151  * for .file directives. This version removes the workspace
152  * from the head of the path, or failing that, attempts to remove
153  * /usr/include. This is a workaround for the way gas handles
154  * these directives. The objects produced by gas contain STT_FILE
155  * symbols for every .file directive. These FILE symbols contain our
156  * workspace paths, leading to wsdiff incorrectly flagging them as
157  * having changed. By clipping off the workspace from these paths,
158  * we eliminate these false positives.
159  */
160 static void
161 copyuntil_path(FILE *in, FILE *out, int termchar,
162     const char *wspace, size_t wspace_len)
163 {
164 #define	PROTO_INC "/proto/root_i386/usr/include/"
165 #define	SYS_INC "/usr/include/"
166 
167 	static const size_t proto_inc_len = sizeof (PROTO_INC) - 1;
168 	static const size_t sys_inc_len = sizeof (SYS_INC) - 1;
169 
170 	/*
171 	 * Dynamically sized buffer for reading paths. Retained
172 	 * and reused between calls.
173 	 */
174 	static char	*buf = NULL;
175 	static size_t	bufsize = 0;
176 
177 	size_t	bufcnt = 0;
178 	char	*bufptr;
179 	int	c;
180 
181 	/* Read the path into the buffer */
182 	while ((c = fgetc(in)) != EOF) {
183 		/*
184 		 * If we need a buffer, or need a larger buffer,
185 		 * fix that here.
186 		 */
187 		if (bufcnt >= bufsize) {
188 			bufsize = (bufsize == 0) ? MAXPATHLEN : (bufsize * 2);
189 			buf = realloc(buf, bufsize + 1); /* + room for NULL */
190 			if (buf == NULL) {
191 				perror("realloc");
192 				exit(1);
193 			}
194 		}
195 
196 		buf[bufcnt++] = c;
197 		if (c == termchar)
198 			break;
199 	}
200 	if (bufcnt == 0)
201 		return;
202 
203 	/*
204 	 * We have a non-empty buffer, and thus the opportunity
205 	 * to do some surgery on it before passing it to the output.
206 	 */
207 	buf[bufcnt] = '\0';
208 	bufptr = buf;
209 
210 	/*
211 	 * If our workspace is at the start, remove it.
212 	 * If not, then look for the system /usr/include instead.
213 	 */
214 	if ((wspace_len > 0) && (wspace_len < bufcnt) &&
215 	    (strncmp(bufptr, wspace, wspace_len) == 0)) {
216 		bufptr += wspace_len;
217 		bufcnt -= wspace_len;
218 
219 		/*
220 		 * Further opportunity: Also clip the prefix
221 		 * that leads to /usr/include in the proto.
222 		 */
223 		if ((proto_inc_len < bufcnt) &&
224 		    (strncmp(bufptr, PROTO_INC, proto_inc_len) == 0)) {
225 			bufptr += proto_inc_len;
226 			bufcnt -= proto_inc_len;
227 		}
228 	} else if ((sys_inc_len < bufcnt) &&
229 	    (strncmp(bufptr, SYS_INC, sys_inc_len) == 0)) {
230 		bufptr += sys_inc_len;
231 		bufcnt -= sys_inc_len;
232 	}
233 
234 	/* Output whatever is left */
235 	if (out && (fwrite(bufptr, 1, bufcnt, out) != bufcnt)) {
236 		perror("fwrite");
237 		exit(1);
238 	}
239 
240 #undef PROTO_INC
241 #undef SYS_INC
242 }
243 
244 /*
245  * The idea here is to take directives like this emitted
246  * by cpp:
247  *
248  *	# num
249  *
250  * and convert them to directives like this that are
251  * understood by the GNU assembler:
252  *
253  *	.line num
254  *
255  * and similarly:
256  *
257  *	# num "string" optional stuff
258  *
259  * is converted to
260  *
261  *	.line num
262  *	.file "string"
263  *
264  * While this could be done with a sequence of sed
265  * commands, this is simpler and faster..
266  */
267 static pid_t
268 filter(int pipein, int pipeout)
269 {
270 	pid_t pid;
271 	FILE *in, *out;
272 	char *wspace;
273 	size_t wspace_len;
274 
275 	if (verbose)
276 		(void) fprintf(stderr, "{#line filter} ");
277 
278 	switch (pid = fork()) {
279 	case 0:
280 		if (dup2(pipein, 0) == -1 ||
281 		    dup2(pipeout, 1) == -1) {
282 			perror("dup2");
283 			exit(1);
284 		}
285 		closefrom(3);
286 		break;
287 	case -1:
288 		perror("fork");
289 	default:
290 		return (pid);
291 	}
292 
293 	in = fdopen(0, "r");
294 	out = fdopen(1, "w");
295 
296 	/*
297 	 * Key off the CODEMGR_WS environment variable to detect
298 	 * if we're in an activated workspace, and to get the
299 	 * path to the workspace.
300 	 */
301 	wspace = getenv("CODEMGR_WS");
302 	if (wspace != NULL)
303 		wspace_len = strlen(wspace);
304 
305 	while (!feof(in)) {
306 		int c, num;
307 
308 		switch (c = fgetc(in)) {
309 		case '#':
310 			switch (fscanf(in, " %d", &num)) {
311 			case 0:
312 				/*
313 				 * discard comment lines completely
314 				 * discard ident strings completely too.
315 				 * (GNU as politely ignores them..)
316 				 */
317 				copyuntil(in, NULL, '\n');
318 				break;
319 			default:
320 				(void) fprintf(stderr, "fscanf botch?");
321 				/*FALLTHROUGH*/
322 			case EOF:
323 				exit(1);
324 				/*NOTREACHED*/
325 			case 1:
326 				/*
327 				 * This line has a number at the beginning;
328 				 * if it has a string after the number, then
329 				 * it's a filename.
330 				 *
331 				 * If this is an activated workspace, use
332 				 * copyuntil_path() to do path rewriting
333 				 * that will prevent workspace paths from
334 				 * being burned into the resulting object.
335 				 * If not in an activated workspace, then
336 				 * copy the existing path straight through
337 				 * without interpretation.
338 				 */
339 				if (fgetc(in) == ' ' && fgetc(in) == '"') {
340 					(void) fprintf(out, "\t.file \"");
341 					if (wspace != NULL)
342 						copyuntil_path(in, out, '"',
343 						    wspace, wspace_len);
344 					else
345 						copyuntil(in, out, '"');
346 					(void) fputc('\n', out);
347 				}
348 				(void) fprintf(out, "\t.line %d\n", num - 1);
349 				/*
350 				 * discard the rest of the line
351 				 */
352 				copyuntil(in, NULL, '\n');
353 				break;
354 			}
355 			break;
356 		case '\n':
357 			/*
358 			 * preserve newlines
359 			 */
360 			(void) fputc(c, out);
361 			break;
362 		case EOF:
363 			/*
364 			 * don't write EOF!
365 			 */
366 			break;
367 		default:
368 			/*
369 			 * lines that don't begin with '#' are copied
370 			 */
371 			(void) fputc(c, out);
372 			copyuntil(in, out, '\n');
373 			break;
374 		}
375 
376 		if (ferror(out))
377 			exit(1);
378 	}
379 
380 	exit(0);
381 	/*NOTREACHED*/
382 }
383 
384 static pid_t
385 invoke(char **argv, int pipein, int pipeout)
386 {
387 	pid_t pid;
388 
389 	if (verbose) {
390 		char **dargv = argv;
391 
392 		while (*dargv)
393 			(void) fprintf(stderr, "%s ", *dargv++);
394 	}
395 
396 	switch (pid = fork()) {
397 	case 0:
398 		if (pipein >= 0 && dup2(pipein, 0) == -1) {
399 			perror("dup2");
400 			exit(1);
401 		}
402 		if (pipeout >= 0 && dup2(pipeout, 1) == -1) {
403 			perror("dup2");
404 			exit(1);
405 		}
406 		closefrom(3);
407 		(void) execvp(argv[0], argv);
408 		perror("execvp");
409 		(void) fprintf(stderr, "%s: couldn't run %s\n",
410 		    progname, argv[0]);
411 		break;
412 	case -1:
413 		perror("fork");
414 	default:
415 		return (pid);
416 	}
417 	exit(2);
418 	/*NOTREACHED*/
419 }
420 
421 static int
422 pipeline(char **ppargv, char **asargv)
423 {
424 	int pipedes[4];
425 	int active = 0;
426 	int rval = 0;
427 	pid_t pid_pp, pid_f, pid_as;
428 
429 	if (pipe(pipedes) == -1 || pipe(pipedes + 2) == -1) {
430 		perror("pipe");
431 		return (4);
432 	}
433 
434 	if ((pid_pp = invoke(ppargv, -1, pipedes[0])) > 0)
435 		active++;
436 
437 	if (verbose)
438 		(void) fprintf(stderr, "| ");
439 
440 	if ((pid_f = filter(pipedes[1], pipedes[2])) > 0)
441 		active++;
442 
443 	if (verbose)
444 		(void) fprintf(stderr, "| ");
445 
446 	if ((pid_as = invoke(asargv, pipedes[3], -1)) > 0)
447 		active++;
448 
449 	if (verbose) {
450 		(void) fprintf(stderr, "\n");
451 		(void) fflush(stderr);
452 	}
453 
454 	closefrom(3);
455 
456 	if (active != 3)
457 		return (5);
458 
459 	while (active != 0) {
460 		pid_t pid;
461 		int stat;
462 
463 		if ((pid = wait(&stat)) == -1) {
464 			rval++;
465 			break;
466 		}
467 
468 		if (!WIFEXITED(stat))
469 			continue;
470 
471 		if (pid == pid_pp || pid == pid_f || pid == pid_as) {
472 			active--;
473 			if (WEXITSTATUS(stat) != 0)
474 				rval++;
475 		}
476 	}
477 
478 	return (rval);
479 }
480 
481 int
482 main(int argc, char *argv[])
483 {
484 	struct aelist *cpp = NULL;
485 	struct aelist *m4 = NULL;
486 	struct aelist *as = newael();
487 	char **asargv;
488 	char *outfile = NULL;
489 	char *srcfile = NULL;
490 	const char *as_dir, *as64_dir, *m4_dir, *m4_lib_dir, *cpp_dir;
491 	char *as_pgm, *as64_pgm, *m4_pgm, *m4_cmdefs, *cpp_pgm;
492 	size_t bufsize;
493 	int as64 = 0;
494 	int code;
495 
496 	if ((progname = strrchr(argv[0], '/')) == NULL)
497 		progname = argv[0];
498 	else
499 		progname++;
500 
501 	/*
502 	 * Helpful when debugging, or when changing tool versions..
503 	 */
504 	if ((as_dir = getenv("AW_AS_DIR")) == NULL)
505 		as_dir = DEFAULT_AS_DIR;	/* /usr/sfw/bin */
506 	bufsize = strlen(as_dir) + strlen("/gas") + 1;
507 	as_pgm = malloc(bufsize);
508 	(void) snprintf(as_pgm, bufsize, "%s/gas", as_dir);
509 
510 	if ((as64_dir = getenv("AW_AS64_DIR")) == NULL)
511 		as64_dir = DEFAULT_AS64_DIR;	/* /usr/sfw/bin */
512 	bufsize = strlen(as64_dir) + strlen("/gas") + 1;
513 	as64_pgm = malloc(bufsize);
514 	(void) snprintf(as64_pgm, bufsize, "%s/gas", as64_dir);
515 
516 	if ((m4_dir = getenv("AW_M4_DIR")) == NULL)
517 		m4_dir = DEFAULT_M4_DIR;	/* /usr/ccs/bin */
518 	bufsize = strlen(m4_dir) + strlen("/m4") + 1;
519 	m4_pgm = malloc(bufsize);
520 	(void) snprintf(m4_pgm, bufsize, "%s/m4", m4_dir);
521 
522 	if ((m4_lib_dir = getenv("AW_M4LIB_DIR")) == NULL)
523 		m4_lib_dir = DEFAULT_M4LIB_DIR;	/* /usr/ccs/lib */
524 	bufsize = strlen(m4_lib_dir) + strlen("/cmdefs") + 1;
525 	m4_cmdefs = malloc(bufsize);
526 	(void) snprintf(m4_cmdefs, bufsize, "%s/cmdefs", m4_lib_dir);
527 
528 	if ((cpp_dir = getenv("AW_CPP_DIR")) == NULL)
529 		cpp_dir = DEFAULT_CPP_DIR;	/* /usr/ccs/lib */
530 	bufsize = strlen(cpp_dir) + strlen("/cpp") + 1;
531 	cpp_pgm = malloc(bufsize);
532 	(void) snprintf(cpp_pgm, bufsize, "%s/cpp", cpp_dir);
533 
534 	newae(as, as_pgm);
535 	newae(as, "--warn");
536 	newae(as, "--fatal-warnings");
537 	newae(as, "--traditional-format");
538 
539 	/*
540 	 * This is a support hack to rewrite code for the compiler
541 	 * which should probably cause an assembler programmer to recode
542 	 * - so, generate a warning in this case.
543 	 */
544 	newae(as, "-K");
545 
546 	/*
547 	 * Walk the argument list, translating as we go ..
548 	 */
549 	while (--argc > 0) {
550 		char *arg;
551 		int arglen;
552 
553 		arg = *++argv;
554 		arglen = strlen(arg);
555 
556 		if (*arg != '-') {
557 			char *filename;
558 
559 			/*
560 			 * filenames ending in '.s' are taken to be
561 			 * assembler files, and provide the default
562 			 * basename of the output file.
563 			 *
564 			 * other files are passed through to the
565 			 * preprocessor, if present, or to gas if not.
566 			 */
567 			filename = arg;
568 			if (arglen > 2 &&
569 			    strcmp(arg + arglen - 2, ".s") == 0) {
570 				/*
571 				 * Though 'as' allows multiple assembler
572 				 * files to be processed in one invocation
573 				 * of the assembler, ON only processes one
574 				 * file at a time, which makes things a lot
575 				 * simpler!
576 				 */
577 				if (srcfile == NULL)
578 					srcfile = arg;
579 				else
580 					return (usage(
581 					    "one assembler file at a time"));
582 
583 				/*
584 				 * If we haven't seen a -o option yet,
585 				 * default the output to the basename
586 				 * of the input, substituting a .o on the end
587 				 */
588 				if (outfile == NULL) {
589 					char *argcopy;
590 
591 					argcopy = strdup(arg);
592 					argcopy[arglen - 1] = 'o';
593 
594 					if ((outfile = strrchr(
595 					    argcopy, '/')) == NULL)
596 						outfile = argcopy;
597 					else
598 						outfile++;
599 				}
600 			}
601 			if (cpp)
602 				newae(cpp, filename);
603 			else if (m4)
604 				newae(m4, filename);
605 			else
606 				newae(as, filename);
607 			continue;
608 		} else
609 			arglen--;
610 
611 		switch (arg[1]) {
612 		case 'K':
613 			/*
614 			 * -K pic
615 			 * -K PIC
616 			 */
617 			if (arglen == 1) {
618 				if ((arg = *++argv) == NULL || *arg == '\0')
619 					return (usage("malformed -K"));
620 				argc--;
621 			} else {
622 				arg += 2;
623 			}
624 			if (strcmp(arg, "PIC") != 0 && strcmp(arg, "pic") != 0)
625 				return (usage("malformed -K"));
626 			break;		/* just ignore -Kpic for gcc */
627 		case 'Q':
628 			if (strcmp(arg, "-Qn") == 0)
629 				break;
630 			/*FALLTHROUGH*/
631 		case 'b':
632 		case 's':
633 		case 'T':
634 			/*
635 			 * -b	Extra symbol table for source browser ..
636 			 *	not relevant to gas, thus should error.
637 			 * -s	Put stabs in .stabs section not stabs.excl
638 			 *	not clear if there's an equivalent
639 			 * -T	4.x migration option
640 			 */
641 		default:
642 			return (error(arg));
643 		case 'x':
644 			/*
645 			 * Accept -xarch special case to invoke alternate
646 			 * assemblers or assembler flags for different
647 			 * architectures.
648 			 */
649 			if (strcmp(arg, "-xarch=amd64") == 0 ||
650 			    strcmp(arg, "-xarch=generic64") == 0) {
651 				as64++;
652 				fixae_arg(as->ael_head, as64_pgm);
653 				break;
654 			}
655 			/*
656 			 * XX64: Is this useful to gas?
657 			 */
658 			if (strcmp(arg, "-xmodel=kernel") == 0)
659 				break;
660 
661 			/*
662 			 * -xF	Generates performance analysis data
663 			 *	no equivalent
664 			 */
665 			return (error(arg));
666 		case 'V':
667 			newae(as, arg);
668 			break;
669 		case '#':
670 			verbose++;
671 			break;
672 		case 'L':
673 			newae(as, "--keep-locals");
674 			break;
675 		case 'n':
676 			newae(as, "--no-warn");
677 			break;
678 		case 'o':
679 			if (arglen != 1)
680 				return (usage("bad -o flag"));
681 			if ((arg = *++argv) == NULL || *arg == '\0')
682 				return (usage("bad -o flag"));
683 			outfile = arg;
684 			argc--;
685 			arglen = strlen(arg + 1);
686 			break;
687 		case 'm':
688 			if (cpp)
689 				return (usage("-m conflicts with -P"));
690 			if (m4 == NULL) {
691 				m4 = newael();
692 				newae(m4, m4_pgm);
693 				newae(m4, m4_cmdefs);
694 			}
695 			break;
696 		case 'P':
697 			if (m4)
698 				return (usage("-P conflicts with -m"));
699 			if (cpp == NULL) {
700 				cpp = newael();
701 				newae(cpp, cpp_pgm);
702 				newae(cpp, "-D__GNUC_AS__");
703 			}
704 			break;
705 		case 'D':
706 		case 'U':
707 			if (cpp)
708 				newae(cpp, arg);
709 			else if (m4)
710 				newae(m4, arg);
711 			else
712 				newae(as, arg);
713 			break;
714 		case 'I':
715 			if (cpp)
716 				newae(cpp, arg);
717 			else
718 				newae(as, arg);
719 			break;
720 		case '-':	/* a gas-specific option */
721 			newae(as, arg);
722 			break;
723 		}
724 	}
725 
726 #if defined(__i386)
727 	if (as64)
728 		newae(as, "--64");
729 	else
730 		newae(as, "--32");
731 #endif
732 
733 	if (srcfile == NULL)
734 		return (usage("no source file(s) specified"));
735 	if (outfile == NULL)
736 		outfile = "a.out";
737 	newae(as, "-o");
738 	newae(as, outfile);
739 
740 	asargv = aeltoargv(as);
741 	if (cpp) {
742 #if defined(__sparc)
743 		newae(cpp, "-Dsparc");
744 		newae(cpp, "-D__sparc");
745 		if (as64)
746 			newae(cpp, "-D__sparcv9");
747 		else
748 			newae(cpp, "-D__sparcv8");
749 #elif defined(__i386) || defined(__x86)
750 		if (as64) {
751 			newae(cpp, "-D__x86_64");
752 			newae(cpp, "-D__amd64");
753 		} else {
754 			newae(cpp, "-Di386");
755 			newae(cpp, "-D__i386");
756 		}
757 #else
758 #error	"need isa-dependent defines"
759 #endif
760 		code = pipeline(aeltoargv(cpp), asargv);
761 	} else if (m4)
762 		code = pipeline(aeltoargv(m4), asargv);
763 	else {
764 		/*
765 		 * XXX	should arrange to fork/exec so that we
766 		 *	can unlink the output file if errors are
767 		 *	detected..
768 		 */
769 		(void) execvp(asargv[0], asargv);
770 		perror("execvp");
771 		(void) fprintf(stderr, "%s: couldn't run %s\n",
772 		    progname, asargv[0]);
773 		code = 7;
774 	}
775 	if (code != 0)
776 		(void) unlink(outfile);
777 	return (code);
778 }
779