xref: /titanic_51/usr/src/cmd/sunpc/other/dos2unix.c (revision bdfc6d18da790deeec2e0eb09c625902defe2498)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 1999-2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  *	Converts files from one char set to another
31  *
32  *	Written 11/09/87	Eddy Bell
33  *
34  */
35 
36 
37 /*
38  *  INCLUDED and DEFINES
39  */
40 #include	<stdio.h>
41 #include	<fcntl.h>
42 #include	<sys/systeminfo.h>
43 #include	<stdlib.h>
44 #include	<string.h>
45 #include	<errno.h>
46 
47 /*#include	<io.h>			for microsoft c 4.0 */
48 
49 #define 	CONTENTS_ASCII	0
50 #define 	CONTENTS_ASCII8 1
51 #define 	CONTENTS_ISO	2
52 #define 	CONTENTS_DOS	3
53 #ifdef _F_BIN
54 #define DOS_BUILD 1
55 #else
56 #define UNIX_BUILD 1
57 #endif
58 
59 /******************************************************************************
60  * INCLUDES AND DEFINES
61  ******************************************************************************/
62 #ifdef UNIX_BUILD
63 #include <sys/types.h>
64 #include	<sys/kbio.h>
65 #include	<sys/time.h>
66 #include	<fcntl.h>
67 #include "../sys/dos_iso.h"
68 #endif
69 
70 #ifdef DOS_BUILD
71 #include <dos.h>
72 #include "..\sys\dos_iso.h"
73 #endif
74 
75 
76 #define 	GLOBAL
77 #define 	LOCAL	static
78 #define 	VOID	int
79 #define 	BOOL	int
80 
81 #define 	FALSE	0
82 #define 	TRUE	~FALSE
83 
84 #define 	CR	0x0D
85 #define 	LF	0x0A
86 #define 	DOS_EOF 0x1A
87 #define		MAXLEN	1024
88 
89 
90 /******************************************************************************
91  * FUNCTION AND VARIABLE DECLARATIONS
92  ******************************************************************************/
93 static	void	error();
94 static	void	usage();
95 static int	tmpfd = -1;
96 
97 /******************************************************************************
98 * ENTRY POINTS
99  ******************************************************************************/
100 
101 
102 void	main(argc, argv)
103 int	argc;
104 char	**argv;
105 {
106    FILE *in_stream = NULL;
107    FILE *out_stream = NULL;
108 	unsigned char tmp_buff[512];
109 	unsigned char *src_str, *dest_str;
110 	char	 *in_file_name, *out_file_name;
111    int num_read, i, j, out_len, translate_mode, same_name;			       /* char count for fread() */
112    unsigned char * dos_to_iso;
113 	int	type;
114 	int	code_page_overide; /* over ride of default codepage */
115 #ifdef UNIX_BUILD
116 	int	kbdfd;
117 #endif
118 	char	sysinfo_str[MAXLEN];
119 
120 	same_name = FALSE;
121 	out_file_name = (char *)0;
122 
123     /*	The filename parameter is positionally dependent - it must be the
124      *	second argument, immediately following the program name. Except
125      *	when a char set switch is passed then the file name must be third
126      *	argument.
127      */
128 
129 	argv++;
130 	in_stream = stdin;
131 	out_stream = stdout;
132 	j = 0;  /* count for file names 0 -> source 1-> dest */
133 	translate_mode = CONTENTS_ISO; /*default trans mode*/
134 	code_page_overide = 0;
135 	for (i=1; i<argc; i++) {
136       		if (*argv[0] == '-') {
137 			if (argc > 1 && !strncmp(*argv,"-iso",4)) {
138 				translate_mode = CONTENTS_ISO;
139 				argv++;
140 			} else if (argc > 1 && !strncmp(*argv,"-7",2)) {
141 				translate_mode = CONTENTS_ASCII;
142 				argv++;
143 			} else if (argc > 1 && !strncmp(*argv,"-ascii",6)) {
144 				translate_mode = CONTENTS_DOS;
145 				argv++;
146 			} else if (argc > 1 && !strncmp(*argv,"-437",4)) {
147 				code_page_overide = CODE_PAGE_US;
148 				argv++;
149 			} else if (argc > 1 && !strncmp(*argv,"-850",4)) {
150 				code_page_overide = CODE_PAGE_MULTILINGUAL;
151 				argv++;
152 			} else if (argc > 1 && !strncmp(*argv,"-860",4)) {
153 				code_page_overide = CODE_PAGE_PORTUGAL;
154 				argv++;
155 			} else if (argc > 1 && !strncmp(*argv,"-863",4)) {
156 				code_page_overide = CODE_PAGE_CANADA_FRENCH;
157 				argv++;
158 			} else if (argc > 1 && !strncmp(*argv,"-865",4)) {
159 				code_page_overide = CODE_PAGE_NORWAY;
160 				argv++;
161 			} else
162 				argv++;
163 			continue;
164 		}else{  /* not a command so must be filename */
165 			switch(j){
166 				case IN_FILE:	/* open in file from cmdline */
167 		       			in_file_name = *argv;
168 		       			j++;  /* next file name is outfile */
169 			       	break;
170 
171 				case OUT_FILE:	/* open out file from cmdline */
172 					out_file_name = *argv;
173 					j++;
174 			   	break;
175 
176 				default:
177 					usage();
178 			}
179 		}
180 
181 
182 	argv++;
183 	}
184 
185 	/* input file is specified */
186 	if (j > 0) {
187 		in_stream = fopen(in_file_name, "r");
188 		if (in_stream == NULL)
189 			error("Couldn't open input file %s.", in_file_name);
190 	}
191 
192 	/* output file is secified */
193 	if (j > 1) {
194 		if(!strcmp(in_file_name, out_file_name)){
195 			/* input and output have same name */
196 			if (access(out_file_name, 2))
197 				error("%s not writable.", out_file_name);
198 			strcpy(out_file_name, "/tmp/udXXXXXX");
199 			tmpfd = mkstemp(out_file_name);
200 			if (tmpfd == -1) {
201 				error("Couldn't create output file %s.",
202 				    out_file_name);
203 			}
204 			(void) close(tmpfd);
205 			same_name = TRUE;
206 		} else
207 			same_name = FALSE;
208 		out_stream = fopen(out_file_name, "w");
209 		if (out_stream == NULL) {
210 			(void) unlink(out_file_name);
211 			error("Couldn't open output file %s.", out_file_name);
212 		}
213 	}
214 
215 #ifdef _F_BIN
216 	setmode(fileno(in_stream), O_BINARY);
217 	setmode(fileno(out_stream), O_BINARY);
218 #endif
219 
220 #ifdef UNIX_BUILD
221 	if(!code_page_overide){
222 		if (sysinfo(SI_ARCHITECTURE,sysinfo_str,MAXLEN)  < 0) {
223 			fprintf(stderr,"could not obtain system information\n");
224 			(void) unlink(out_file_name);
225 			exit(1);
226 
227 		}
228 		if (strcmp(sysinfo_str,"i386")) {
229 			if ((kbdfd = open("/dev/kbd", O_WRONLY)) < 0) {
230 				fprintf(stderr, "could not open /dev/kbd to "
231 				    "get keyboard type US keyboard assumed\n");
232 			}
233 			if (ioctl(kbdfd, KIOCLAYOUT, &type) < 0) {
234 				fprintf(stderr,"could not get keyboard type US keyboard assumed\n");
235 			}
236 		} else {
237 			type = 0;
238 		}
239 		switch(type){
240 			case	0:
241 			case	1:	/* United States */
242 				dos_to_iso = &dos_to_iso_cp_437[0];
243 			break;
244 
245 			case	2:	/* Belgian French */
246 				dos_to_iso = &dos_to_iso_cp_437[0];
247 			break;
248 
249 			case	3:	/* Canadian French */
250 				dos_to_iso = &dos_to_iso_cp_863[0];
251 			break;
252 
253 			case	4:	/* Danish */
254 				dos_to_iso = &dos_to_iso_cp_865[0];
255 			break;
256 
257 			case	5:	/* German */
258 				dos_to_iso = &dos_to_iso_cp_437[0];
259 			break;
260 
261 			case	6:	/* Italian */
262 				dos_to_iso = &dos_to_iso_cp_437[0];
263 			break;
264 
265 			case	7:	/* Netherlands Dutch */
266 				dos_to_iso = &dos_to_iso_cp_437[0];
267 			break;
268 
269 			case	8:	/* Norwegian */
270 				dos_to_iso = &dos_to_iso_cp_865[0];
271 			break;
272 
273 			case	9:	/* Portuguese */
274 				dos_to_iso = &dos_to_iso_cp_860[0];
275 			break;
276 
277 			case	10:	/* Spanish */
278 				dos_to_iso = &dos_to_iso_cp_437[0];
279 			break;
280 
281 			case	11:	/* Swedish Finnish */
282 				dos_to_iso = &dos_to_iso_cp_437[0];
283 			break;
284 
285 			case	12:	/* Swiss French */
286 				dos_to_iso = &dos_to_iso_cp_437[0];
287 			break;
288 
289 			case	13:	/* Swiss German */
290 				dos_to_iso = &dos_to_iso_cp_437[0];
291 			break;
292 
293 			case	14:	/* United Kingdom */
294 				dos_to_iso = &dos_to_iso_cp_437[0];
295 
296 			break;
297 
298 			default:
299 				dos_to_iso = &dos_to_iso_cp_437[0];
300 			break;
301 		}
302 	}else{
303 		switch(code_page_overide){
304 			case CODE_PAGE_US:
305 				dos_to_iso = &dos_to_iso_cp_437[0];
306 			break;
307 
308 			case CODE_PAGE_MULTILINGUAL:
309 				dos_to_iso = &dos_to_iso_cp_850[0];
310 			break;
311 
312 			case CODE_PAGE_PORTUGAL:
313 				dos_to_iso = &dos_to_iso_cp_860[0];
314 			break;
315 
316 			case CODE_PAGE_CANADA_FRENCH:
317 				dos_to_iso = &dos_to_iso_cp_863[0];
318 			break;
319 
320 			case CODE_PAGE_NORWAY:
321 				dos_to_iso = &dos_to_iso_cp_865[0];
322 			break;
323 		}
324 	}
325 
326 #endif
327 #ifdef DOS_BUILD
328 	if(!code_page_overide){
329 		{
330 		union REGS regs;
331 		regs.h.ah = 0x66;	/* get/set global code page */
332 		regs.h.al = 0x01;		/* get */
333 		intdos(&regs, &regs);
334 		type = regs.x.bx;
335 		}
336 		switch(type){
337 			case	437:	/* United States */
338 				dos_to_iso = &dos_to_iso_cp_437[0];
339 			break;
340 
341 			case	850:	/* Multilingual */
342 				dos_to_iso = &dos_to_iso_cp_850[0];
343 			break;
344 
345 			case	860:	/* Portuguese */
346 				dos_to_iso = &dos_to_iso_cp_860[0];
347 			break;
348 
349 			case	863:	/* Canadian French */
350 				dos_to_iso = &dos_to_iso_cp_863[0];
351 			break;
352 
353 			case	865:	/* Danish */
354 				dos_to_iso = &dos_to_iso_cp_865[0];
355 			break;
356 
357 			default:
358 				dos_to_iso = &dos_to_iso_cp_437[0];
359 			break;
360 		}
361 	}else{
362 		switch(code_page_overide){
363 			case CODE_PAGE_US:
364 				dos_to_iso = &dos_to_iso_cp_437[0];
365 			break;
366 
367 			case CODE_PAGE_MULTILINGUAL:
368 				dos_to_iso = &dos_to_iso_cp_850[0];
369 			break;
370 
371 			case CODE_PAGE_PORTUGAL:
372 				dos_to_iso = &dos_to_iso_cp_860[0];
373 			break;
374 
375 			case CODE_PAGE_CANADA_FRENCH:
376 				dos_to_iso = &dos_to_iso_cp_863[0];
377 			break;
378 
379 			case CODE_PAGE_NORWAY:
380 				dos_to_iso = &dos_to_iso_cp_865[0];
381 			break;
382 		}
383 	}
384 
385 
386 #endif
387 
388     /*	While not EOF, read in chars and send them to out_stream
389      *	if current char is not a CR.
390      */
391 
392     do {
393 		num_read = fread(&tmp_buff[0], 1, 100, in_stream);
394 		i = 0;
395 		out_len = 0;
396 		src_str = dest_str = &tmp_buff[0];
397 		switch (translate_mode){
398 			case CONTENTS_ISO:
399 				{
400 				while ( i++ != num_read ){
401 					if( *src_str == '\r'){
402 						src_str++;
403 						}
404 					else{
405 						out_len++;
406 						*dest_str++ = dos_to_iso[*src_str++];
407 						}
408 					}
409 				}
410 				break;
411 
412 			case CONTENTS_ASCII:
413 				{
414 				while ( i++ != num_read){
415 					if( *src_str == '\r'){
416 						src_str++;
417 						continue;
418 						}
419 					else if ( *src_str > 127 ){
420 						*dest_str++ = (unsigned char) ' ';
421 						src_str++;
422 						out_len++;
423 						}
424 					else{
425 						out_len++;
426 						*dest_str++ = *src_str++;
427 						}
428 					}
429 				}
430 				break;
431 
432 			case CONTENTS_DOS:
433 				{
434 				while ( i++ != num_read){
435 					if( *src_str == '\r'){
436 						src_str++;
437 						continue;
438 						}
439 						*dest_str++ =	*src_str++;
440 						out_len++;
441 					}
442 				}
443 				break;
444 			}
445 		if (out_len > num_read)
446 			out_len = num_read;
447 		if (tmp_buff[out_len-2] == DOS_EOF)
448 			out_len -= 2;
449 		else if (tmp_buff[out_len-1] == DOS_EOF)
450 			out_len -= 1;
451 
452 		if( out_len > 0 &&
453 		    out_len != (i= fwrite(&tmp_buff[0], 1, out_len, out_stream)))
454 			error("Error writing %s.", out_file_name);
455 
456 		} while (!feof(in_stream));
457 
458 	fclose(out_stream);
459 	fclose(in_stream);
460 	if(same_name){
461 		unlink(in_file_name);
462 		in_stream = fopen(out_file_name, "r");
463 		out_stream = fopen(in_file_name, "w");
464 #ifdef _F_BIN
465 		setmode(fileno(in_stream), O_BINARY);
466 		setmode(fileno(out_stream), O_BINARY);
467 #endif
468 		while ((num_read = (unsigned)fread(tmp_buff, 1, sizeof tmp_buff, in_stream)) != 0) {
469 		   if( num_read != fwrite(tmp_buff, 1, num_read, out_stream))
470 			error("Error writing %s.", in_file_name);
471 		}
472 		fclose(out_stream);
473 		fclose(in_stream);
474 		unlink(out_file_name);
475 	}
476 	exit(0);
477 }
478 
479 void	error(format, args)
480 	char	*format;
481 	char	*args;
482 {
483 	fprintf(stderr, "dos2unix: ");
484 	fprintf(stderr, format, args);
485 	fprintf(stderr, "  %s.\n", strerror(errno));
486 	exit(1);
487 }
488 
489 void usage()
490 {
491 	fprintf(stderr, "usage: dos2unix [ -ascii ] [ -iso ] [ -7 ] [ originalfile [ convertedfile ] ]\n");
492 	exit(1);
493 }
494 
495