xref: /illumos-gate/usr/src/cmd/cat/cat.c (revision 6d317d2f8bc347904716264ebe052812c3fc217a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
22 /*	  All Rights Reserved  	*/
23 
24 
25 /*
26  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
27  * Use is subject to license terms.
28  */
29 
30 /*
31  * Copyright (c) 2018, Joyent, Inc.
32  */
33 
34 /*
35  *	Concatenate files.
36  */
37 
38 #include	<stdio.h>
39 #include	<stdlib.h>
40 #include	<ctype.h>
41 #include	<sys/types.h>
42 #include	<sys/stat.h>
43 #include	<locale.h>
44 #include	<unistd.h>
45 #include	<sys/mman.h>
46 #include	<errno.h>
47 #include	<string.h>
48 
49 #include	<widec.h>
50 #include	<wctype.h>
51 #include	<limits.h>
52 #include	<libintl.h>
53 #define	IDENTICAL(A, B)	(A.st_dev == B.st_dev && A.st_ino == B.st_ino)
54 
55 #define	MAXMAPSIZE	(8*1024*1024)	/* map at most 8MB */
56 #define	SMALLFILESIZE	(32*1024)	/* don't use mmap on little files */
57 
58 static int vncat(FILE *);
59 static int cat(FILE *, struct stat *, struct stat *, char *);
60 
61 static int	silent = 0;		/* s flag */
62 static int	visi_mode = 0;		/* v flag */
63 static int	visi_tab = 0;		/* t flag */
64 static int	visi_newline = 0;	/* e flag */
65 static int	bflg = 0;		/* b flag */
66 static int	nflg = 0;		/* n flag */
67 static long	ibsize;
68 static long	obsize;
69 static unsigned	char	buf[SMALLFILESIZE];
70 
71 
72 int
73 main(int argc, char **argv)
74 {
75 	FILE *fi;
76 	int c;
77 	extern	int optind;
78 	int	errflg = 0;
79 	int	stdinflg = 0;
80 	int	status = 0;
81 	int	estatus = 0;
82 	struct stat source, target;
83 
84 	(void) setlocale(LC_ALL, "");
85 #if !defined(TEXT_DOMAIN)	/* Should be defined by cc -D */
86 #define	TEXT_DOMAIN "SYS_TEST"	/* Use this only if it weren't */
87 #endif
88 	(void) textdomain(TEXT_DOMAIN);
89 
90 #ifdef STANDALONE
91 	/*
92 	 * If the first argument is NULL,
93 	 * discard arguments until we find cat.
94 	 */
95 	if (argv[0][0] == '\0')
96 		argc = getargv("cat", &argv, 0);
97 #endif
98 
99 	/*
100 	 * Process the options for cat.
101 	 */
102 
103 	while ((c = getopt(argc, argv, "usvtebn")) != EOF) {
104 		switch (c) {
105 
106 		case 'u':
107 
108 			/*
109 			 * If not standalone, set stdout to
110 			 * completely unbuffered I/O when
111 			 * the 'u' option is used.
112 			 */
113 
114 #ifndef	STANDALONE
115 			setbuf(stdout, (char *)NULL);
116 #endif
117 			continue;
118 
119 		case 's':
120 
121 			/*
122 			 * The 's' option requests silent mode
123 			 * where no messages are written.
124 			 */
125 
126 			silent++;
127 			continue;
128 
129 		case 'v':
130 
131 			/*
132 			 * The 'v' option requests that non-printing
133 			 * characters (with the exception of newlines,
134 			 * form-feeds, and tabs) be displayed visibly.
135 			 *
136 			 * Control characters are printed as "^x".
137 			 * DEL characters are printed as "^?".
138 			 * Non-printable  and non-contrlol characters with the
139 			 * 8th bit set are printed as "M-x".
140 			 */
141 
142 			visi_mode++;
143 			continue;
144 
145 		case 't':
146 
147 			/*
148 			 * When in visi_mode, this option causes tabs
149 			 * to be displayed as "^I".
150 			 */
151 
152 			visi_tab++;
153 			continue;
154 
155 		case 'e':
156 
157 			/*
158 			 * When in visi_mode, this option causes newlines
159 			 * and form-feeds to be displayed as "$" at the end
160 			 * of the line prior to the newline.
161 			 */
162 
163 			visi_newline++;
164 			continue;
165 
166 		case 'b':
167 
168 			/*
169 			 * Precede each line output with its line number,
170 			 * but omit the line numbers from blank lines.
171 			 */
172 
173 			bflg++;
174 			nflg++;
175 			continue;
176 
177 		case 'n':
178 
179 			/*
180 			 * Precede each line output with its line number.
181 			 */
182 
183 			nflg++;
184 			continue;
185 
186 		case '?':
187 			errflg++;
188 			break;
189 		}
190 		break;
191 	}
192 
193 	if (errflg) {
194 		if (!silent)
195 			(void) fprintf(stderr,
196 			    gettext("usage: cat [ -usvtebn ] [-|file] ...\n"));
197 		exit(2);
198 	}
199 
200 	/*
201 	 * Stat stdout to be sure it is defined.
202 	 */
203 
204 	if (fstat(fileno(stdout), &target) < 0) {
205 		if (!silent)
206 			(void) fprintf(stderr,
207 			    gettext("cat: Cannot stat stdout\n"));
208 		exit(2);
209 	}
210 	obsize = target.st_blksize;
211 
212 	/*
213 	 * If no arguments given, then use stdin for input.
214 	 */
215 
216 	if (optind == argc) {
217 		argc++;
218 		stdinflg++;
219 	}
220 
221 	/*
222 	 * Process each remaining argument,
223 	 * unless there is an error with stdout.
224 	 */
225 
226 
227 	for (argv = &argv[optind];
228 	    optind < argc && !ferror(stdout); optind++, argv++) {
229 
230 		/*
231 		 * If the argument was '-' or there were no files
232 		 * specified, take the input from stdin.
233 		 */
234 
235 		if (stdinflg ||
236 		    ((*argv)[0] == '-' && (*argv)[1] == '\0'))
237 			fi = stdin;
238 		else {
239 			/*
240 			 * Attempt to open each specified file.
241 			 */
242 
243 			if ((fi = fopen(*argv, "r")) == NULL) {
244 				if (!silent)
245 					(void) fprintf(stderr, gettext(
246 					    "cat: cannot open %s: %s\n"),
247 					    *argv, strerror(errno));
248 				status = 2;
249 				continue;
250 			}
251 		}
252 
253 		/*
254 		 * Stat source to make sure it is defined.
255 		 */
256 
257 		if (fstat(fileno(fi), &source) < 0) {
258 			if (!silent)
259 				(void) fprintf(stderr,
260 				    gettext("cat: cannot stat %s: %s\n"),
261 				    (stdinflg) ? "-" : *argv, strerror(errno));
262 			status = 2;
263 			continue;
264 		}
265 
266 
267 		/*
268 		 * If the source is not a character special file, socket or a
269 		 * block special file, make sure it is not identical
270 		 * to the target.
271 		 */
272 
273 		if (!S_ISCHR(target.st_mode) &&
274 		    !S_ISBLK(target.st_mode) &&
275 		    !S_ISSOCK(target.st_mode) &&
276 		    IDENTICAL(target, source)) {
277 			if (!silent) {
278 				(void) fprintf(stderr, gettext("cat: "
279 				    "input/output files '%s' identical\n"),
280 				    stdinflg?"-": *argv);
281 			}
282 
283 			if (fclose(fi) != 0)
284 				(void) fprintf(stderr,
285 				    gettext("cat: close error: %s\n"),
286 				    strerror(errno));
287 			status = 2;
288 			continue;
289 		}
290 		ibsize = source.st_blksize;
291 
292 		/*
293 		 * If in visible mode and/or nflg, use vncat;
294 		 * otherwise, use cat.
295 		 */
296 
297 		if (visi_mode || nflg)
298 			estatus = vncat(fi);
299 		else
300 			estatus = cat(fi, &source, &target,
301 			    fi != stdin ? *argv : "standard input");
302 
303 		if (estatus)
304 			status = estatus;
305 
306 		/*
307 		 * If the input is not stdin, close the source file.
308 		 */
309 
310 		if (fi != stdin) {
311 			if (fclose(fi) != 0)
312 				if (!silent)
313 					(void) fprintf(stderr,
314 					    gettext("cat: close error: %s\n"),
315 					    strerror(errno));
316 		}
317 	}
318 
319 	/*
320 	 * Display any error with stdout operations.
321 	 */
322 
323 	if (fclose(stdout) != 0) {
324 		if (!silent)
325 			perror(gettext("cat: close error"));
326 		status = 2;
327 	}
328 	return (status);
329 }
330 
331 
332 
333 static int
334 cat(FILE *fi, struct stat *statp, struct stat *outp, char *filenm)
335 {
336 	int nitems;
337 	int nwritten;
338 	int offset;
339 	int fi_desc;
340 	long buffsize;
341 	char *bufferp;
342 	off_t mapsize, munmapsize;
343 	off_t filesize;
344 	off_t mapoffset;
345 
346 	fi_desc = fileno(fi);
347 	if (S_ISREG(statp->st_mode) && (lseek(fi_desc, (off_t)0, SEEK_CUR)
348 	    == 0) && (statp->st_size > SMALLFILESIZE)) {
349 		mapsize = (off_t)MAXMAPSIZE;
350 		if (statp->st_size < mapsize)
351 			mapsize = statp->st_size;
352 		munmapsize = mapsize;
353 
354 		/*
355 		 * Mmap time!
356 		 */
357 		bufferp = mmap((caddr_t)NULL, (size_t)mapsize, PROT_READ,
358 		    MAP_SHARED, fi_desc, (off_t)0);
359 		if (bufferp == (caddr_t)-1)
360 			mapsize = 0;	/* I guess we can't mmap today */
361 	} else
362 		mapsize = 0;		/* can't mmap non-regular files */
363 
364 	if (mapsize != 0) {
365 		int	read_error = 0;
366 		char	x;
367 
368 		/*
369 		 * NFS V2 will let root open a file it does not have permission
370 		 * to read. This read() is here to make sure that the access
371 		 * time on the input file will be updated. The VSC tests for
372 		 * cat do this:
373 		 *	cat file > /dev/null
374 		 * In this case the write()/mmap() pair will not read the file
375 		 * and the access time will not be updated.
376 		 */
377 
378 		if (read(fi_desc, &x, 1) == -1)
379 			read_error = 1;
380 		mapoffset = 0;
381 		filesize = statp->st_size;
382 		for (;;) {
383 			/*
384 			 * Note that on some systems (V7), very large writes to
385 			 * a pipe return less than the requested size of the
386 			 * write.  In this case, multiple writes are required.
387 			 */
388 			offset = 0;
389 			nitems = (int)mapsize;
390 			do {
391 				if ((nwritten = write(fileno(stdout),
392 				    &bufferp[offset], (size_t)nitems)) < 0) {
393 					if (!silent) {
394 						if (read_error == 1)
395 							(void) fprintf(
396 							    stderr, gettext(
397 							    "cat: cannot read "
398 							    "%s: "), filenm);
399 						else
400 							(void) fprintf(stderr,
401 							    gettext(
402 							    "cat: write "
403 							    "error: "));
404 						perror("");
405 					}
406 					(void) munmap(bufferp,
407 					    (size_t)munmapsize);
408 					(void) lseek(fi_desc, (off_t)mapoffset,
409 					    SEEK_SET);
410 					return (2);
411 				}
412 				offset += nwritten;
413 			} while ((nitems -= nwritten) > 0);
414 
415 			filesize -= mapsize;
416 			mapoffset += mapsize;
417 			if (filesize == 0)
418 				break;
419 			if (filesize < mapsize)
420 				mapsize = filesize;
421 			if (mmap(bufferp, (size_t)mapsize, PROT_READ,
422 			    MAP_SHARED|MAP_FIXED, fi_desc,
423 			    mapoffset) == (caddr_t)-1) {
424 				if (!silent)
425 					perror(gettext("cat: mmap error"));
426 				(void) munmap(bufferp, (size_t)munmapsize);
427 				(void) lseek(fi_desc, (off_t)mapoffset,
428 				    SEEK_SET);
429 				return (1);
430 			}
431 		}
432 		/*
433 		 * Move the file pointer past what we read. Shell scripts
434 		 * rely on cat to do this, so that successive commands in
435 		 * the script won't re-read the same data.
436 		 */
437 		(void) lseek(fi_desc, (off_t)mapoffset, SEEK_SET);
438 		(void) munmap(bufferp, (size_t)munmapsize);
439 	} else {
440 		if (S_ISREG(statp->st_mode) && S_ISREG(outp->st_mode)) {
441 			bufferp = (char *)buf;
442 			buffsize = SMALLFILESIZE;
443 		} else {
444 			if (obsize)
445 				/*
446 				 * common case, use output blksize
447 				 */
448 				buffsize = obsize;
449 			else if (ibsize)
450 				buffsize = ibsize;
451 			else
452 				buffsize = (long)BUFSIZ;
453 
454 			if (buffsize <= SMALLFILESIZE) {
455 				bufferp = (char *)buf;
456 			} else if ((bufferp =
457 			    malloc((size_t)buffsize)) == NULL) {
458 				perror(gettext("cat: no memory"));
459 				return (1);
460 			}
461 		}
462 
463 		/*
464 		 * While not end of file, copy blocks to stdout.
465 		 */
466 		while ((nitems = read(fi_desc, bufferp, (size_t)buffsize)) >
467 		    0) {
468 			offset = 0;
469 			/*
470 			 * Note that on some systems (V7), very large writes
471 			 * to a pipe return less than the requested size of
472 			 * the write.  In this case, multiple writes are
473 			 * required.
474 			 */
475 			do {
476 				nwritten = write(1, bufferp+offset,
477 				    (size_t)nitems);
478 				if (nwritten < 0) {
479 					if (!silent) {
480 						if (nwritten == -1)
481 							nwritten = 0l;
482 						(void) fprintf(stderr, gettext(\
483 "cat: output error (%d/%d characters written)\n"), nwritten, nitems);
484 						perror("");
485 					}
486 					if (bufferp != (char *)buf)
487 						free(bufferp);
488 					return (2);
489 				}
490 				offset += nwritten;
491 			} while ((nitems -= nwritten) > 0);
492 		}
493 		if (bufferp != (char *)buf)
494 			free(bufferp);
495 		if (nitems < 0) {
496 			(void) fprintf(stderr,
497 			    gettext("cat: input error on %s: "), filenm);
498 			perror("");
499 			return (1);
500 		}
501 	}
502 
503 	return (0);
504 }
505 
506 static int
507 vncat(fi)
508 	FILE *fi;
509 {
510 	int c;
511 	int	lno;
512 	int	boln;	/* = 1 if at beginning of line */
513 			/* = 0 otherwise */
514 	wchar_t	wc;
515 	int	len, n;
516 	unsigned char	*p1, *p2;
517 
518 	lno = 1;
519 	boln = 1;
520 	p1 = p2 = buf;
521 	for (;;) {
522 		if (p1 >= p2) {
523 			p1 = buf;
524 			if ((len = fread(p1, 1, BUFSIZ, fi)) <= 0)
525 				break;
526 			p2 = p1 + len;
527 		}
528 		c = *p1++;
529 
530 		/*
531 		 * Display newlines as "$<newline>"
532 		 * if visi_newline set
533 		 */
534 		if (c == '\n') {
535 			if (nflg && boln && !bflg)
536 				(void) printf("%6d\t", lno++);
537 			boln = 1;
538 
539 			if (visi_mode && visi_newline)
540 				(void) putchar('$');
541 			(void) putchar(c);
542 			continue;
543 		}
544 
545 		if (nflg && boln)
546 			(void) printf("%6d\t", lno++);
547 		boln = 0;
548 
549 		/*
550 		 * For non-printable and non-cntrl chars,
551 		 * use the "M-x" notation.
552 		 */
553 
554 		if (isascii(c)) {
555 			if (isprint(c) || visi_mode == 0) {
556 				(void) putchar(c);
557 				continue;
558 			}
559 
560 			/*
561 			 * For non-printable ascii characters.
562 			 */
563 
564 			if (iscntrl(c)) {
565 				/* For cntrl characters. */
566 				if ((c == '\t') || (c == '\f')) {
567 					/*
568 					 * Display tab as "^I" if visi_tab set
569 					 */
570 					if (visi_mode && visi_tab) {
571 						(void) putchar('^');
572 						(void) putchar(c^0100);
573 					} else
574 						(void) putchar(c);
575 					continue;
576 				}
577 				(void) putchar('^');
578 				(void) putchar(c^0100);
579 				continue;
580 			}
581 			continue;
582 		}
583 
584 		/*
585 		 * For non-ascii characters.
586 		 */
587 		p1--;
588 		if ((len = (p2 - p1)) < MB_LEN_MAX) {
589 			for (n = 0; n < len; n++)
590 				buf[n] = *p1++;
591 			p1 = buf;
592 			p2 = p1 + n;
593 			if ((len = fread(p2, 1, BUFSIZ - n, fi)) > 0)
594 				p2 += len;
595 		}
596 
597 		if ((len = (p2 - p1)) > MB_LEN_MAX)
598 			len = MB_LEN_MAX;
599 
600 		if ((len = mbtowc(&wc, (char *)p1, len)) > 0) {
601 			if (iswprint(wc) || visi_mode == 0) {
602 				(void) putwchar(wc);
603 				p1 += len;
604 				continue;
605 			}
606 		}
607 
608 		(void) putchar('M');
609 		(void) putchar('-');
610 		c -= 0200;
611 
612 		if (isprint(c)) {
613 			(void) putchar(c);
614 		}
615 
616 		/* For non-printable characters. */
617 		if (iscntrl(c)) {
618 			/* For cntrl characters. */
619 			if ((c == '\t') || (c == '\f')) {
620 				/*
621 				 * Display tab as "^I" if visi_tab set
622 				 */
623 				if (visi_mode && visi_tab) {
624 					(void) putchar('^');
625 					(void) putchar(c^0100);
626 				} else
627 					(void) putchar(c);
628 			} else {
629 				(void) putchar('^');
630 				(void) putchar(c^0100);
631 			}
632 		}
633 		p1++;
634 	}
635 	return (0);
636 }
637