xref: /illumos-gate/usr/src/cmd/sunpc/other/dos2unix.c (revision 20a7641f9918de8574b8b3b47dbe35c4bfc78df1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  *	Converts files from one char set to another
29  *
30  *	Written 11/09/87	Eddy Bell
31  *
32  */
33 
34 
35 /*
36  *  INCLUDED and DEFINES
37  */
38 #include	<stdio.h>
39 #include	<fcntl.h>
40 #include	<sys/systeminfo.h>
41 #include	<stdlib.h>
42 #include	<string.h>
43 #include	<errno.h>
44 
45 /*#include	<io.h>			for microsoft c 4.0 */
46 
47 #define 	CONTENTS_ASCII	0
48 #define 	CONTENTS_ASCII8 1
49 #define 	CONTENTS_ISO	2
50 #define 	CONTENTS_DOS	3
51 #ifdef _F_BIN
52 #define DOS_BUILD 1
53 #else
54 #define UNIX_BUILD 1
55 #endif
56 
57 /******************************************************************************
58  * INCLUDES AND DEFINES
59  ******************************************************************************/
60 #ifdef UNIX_BUILD
61 #include <sys/types.h>
62 #include	<sys/kbio.h>
63 #include	<sys/time.h>
64 #include	<fcntl.h>
65 #include "../sys/dos_iso.h"
66 #endif
67 
68 #ifdef DOS_BUILD
69 #include <dos.h>
70 #include "..\sys\dos_iso.h"
71 #endif
72 
73 
74 #define 	GLOBAL
75 #define 	LOCAL	static
76 #define 	VOID	int
77 #define 	BOOL	int
78 
79 #define 	FALSE	0
80 #define 	TRUE	~FALSE
81 
82 #define 	CR	0x0D
83 #define 	LF	0x0A
84 #define 	DOS_EOF 0x1A
85 #define		MAXLEN	1024
86 
87 
88 /******************************************************************************
89  * FUNCTION AND VARIABLE DECLARATIONS
90  ******************************************************************************/
91 static	void	error();
92 static	void	usage();
93 static int	tmpfd = -1;
94 
95 /******************************************************************************
96 * ENTRY POINTS
97  ******************************************************************************/
98 
99 int
100 main(int argc, char **argv)
101 {
102    FILE *in_stream = NULL;
103    FILE *out_stream = NULL;
104 	unsigned char tmp_buff[512];
105 	unsigned char *src_str, *dest_str;
106 	char	 *in_file_name, *out_file_name;
107    int num_read, i, j, out_len, translate_mode, same_name;			       /* char count for fread() */
108    unsigned char * dos_to_iso;
109 	int	type;
110 	int	code_page_overide; /* over ride of default codepage */
111 #ifdef UNIX_BUILD
112 	int	kbdfd;
113 #endif
114 	char	sysinfo_str[MAXLEN];
115 
116 	same_name = FALSE;
117 	out_file_name = (char *)0;
118 
119     /*	The filename parameter is positionally dependent - it must be the
120      *	second argument, immediately following the program name. Except
121      *	when a char set switch is passed then the file name must be third
122      *	argument.
123      */
124 
125 	argv++;
126 	in_stream = stdin;
127 	out_stream = stdout;
128 	j = 0;  /* count for file names 0 -> source 1-> dest */
129 	translate_mode = CONTENTS_ISO; /*default trans mode*/
130 	code_page_overide = 0;
131 	for (i=1; i<argc; i++) {
132       		if (*argv[0] == '-') {
133 			if (argc > 1 && !strncmp(*argv,"-iso",4)) {
134 				translate_mode = CONTENTS_ISO;
135 				argv++;
136 			} else if (argc > 1 && !strncmp(*argv,"-7",2)) {
137 				translate_mode = CONTENTS_ASCII;
138 				argv++;
139 			} else if (argc > 1 && !strncmp(*argv,"-ascii",6)) {
140 				translate_mode = CONTENTS_DOS;
141 				argv++;
142 			} else if (argc > 1 && !strncmp(*argv,"-437",4)) {
143 				code_page_overide = CODE_PAGE_US;
144 				argv++;
145 			} else if (argc > 1 && !strncmp(*argv,"-850",4)) {
146 				code_page_overide = CODE_PAGE_MULTILINGUAL;
147 				argv++;
148 			} else if (argc > 1 && !strncmp(*argv,"-860",4)) {
149 				code_page_overide = CODE_PAGE_PORTUGAL;
150 				argv++;
151 			} else if (argc > 1 && !strncmp(*argv,"-863",4)) {
152 				code_page_overide = CODE_PAGE_CANADA_FRENCH;
153 				argv++;
154 			} else if (argc > 1 && !strncmp(*argv,"-865",4)) {
155 				code_page_overide = CODE_PAGE_NORWAY;
156 				argv++;
157 			} else
158 				argv++;
159 			continue;
160 		}else{  /* not a command so must be filename */
161 			switch(j){
162 				case IN_FILE:	/* open in file from cmdline */
163 		       			in_file_name = *argv;
164 		       			j++;  /* next file name is outfile */
165 			       	break;
166 
167 				case OUT_FILE:	/* open out file from cmdline */
168 					out_file_name = *argv;
169 					j++;
170 			   	break;
171 
172 				default:
173 					usage();
174 			}
175 		}
176 
177 
178 	argv++;
179 	}
180 
181 	/* input file is specified */
182 	if (j > 0) {
183 		in_stream = fopen(in_file_name, "r");
184 		if (in_stream == NULL)
185 			error("Couldn't open input file %s.", in_file_name);
186 	}
187 
188 	/* output file is secified */
189 	if (j > 1) {
190 		if(!strcmp(in_file_name, out_file_name)){
191 			/* input and output have same name */
192 			if (access(out_file_name, 2))
193 				error("%s not writable.", out_file_name);
194 			strcpy(out_file_name, "/tmp/udXXXXXX");
195 			tmpfd = mkstemp(out_file_name);
196 			if (tmpfd == -1) {
197 				error("Couldn't create output file %s.",
198 				    out_file_name);
199 			}
200 			(void) close(tmpfd);
201 			same_name = TRUE;
202 		} else
203 			same_name = FALSE;
204 		out_stream = fopen(out_file_name, "w");
205 		if (out_stream == NULL) {
206 			(void) unlink(out_file_name);
207 			error("Couldn't open output file %s.", out_file_name);
208 		}
209 	}
210 
211 #ifdef _F_BIN
212 	setmode(fileno(in_stream), O_BINARY);
213 	setmode(fileno(out_stream), O_BINARY);
214 #endif
215 
216 #ifdef UNIX_BUILD
217 	if(!code_page_overide){
218 		if (sysinfo(SI_ARCHITECTURE,sysinfo_str,MAXLEN)  < 0) {
219 			fprintf(stderr,"could not obtain system information\n");
220 			(void) unlink(out_file_name);
221 			exit(1);
222 
223 		}
224 		if (strcmp(sysinfo_str,"i386")) {
225 			if ((kbdfd = open("/dev/kbd", O_WRONLY)) < 0) {
226 				fprintf(stderr, "could not open /dev/kbd to "
227 				    "get keyboard type US keyboard assumed\n");
228 			}
229 			if (ioctl(kbdfd, KIOCLAYOUT, &type) < 0) {
230 				fprintf(stderr,"could not get keyboard type US keyboard assumed\n");
231 			}
232 		} else {
233 			type = 0;
234 		}
235 		switch(type){
236 			case	0:
237 			case	1:	/* United States */
238 				dos_to_iso = &dos_to_iso_cp_437[0];
239 			break;
240 
241 			case	2:	/* Belgian French */
242 				dos_to_iso = &dos_to_iso_cp_437[0];
243 			break;
244 
245 			case	3:	/* Canadian French */
246 				dos_to_iso = &dos_to_iso_cp_863[0];
247 			break;
248 
249 			case	4:	/* Danish */
250 				dos_to_iso = &dos_to_iso_cp_865[0];
251 			break;
252 
253 			case	5:	/* German */
254 				dos_to_iso = &dos_to_iso_cp_437[0];
255 			break;
256 
257 			case	6:	/* Italian */
258 				dos_to_iso = &dos_to_iso_cp_437[0];
259 			break;
260 
261 			case	7:	/* Netherlands Dutch */
262 				dos_to_iso = &dos_to_iso_cp_437[0];
263 			break;
264 
265 			case	8:	/* Norwegian */
266 				dos_to_iso = &dos_to_iso_cp_865[0];
267 			break;
268 
269 			case	9:	/* Portuguese */
270 				dos_to_iso = &dos_to_iso_cp_860[0];
271 			break;
272 
273 			case	10:	/* Spanish */
274 				dos_to_iso = &dos_to_iso_cp_437[0];
275 			break;
276 
277 			case	11:	/* Swedish Finnish */
278 				dos_to_iso = &dos_to_iso_cp_437[0];
279 			break;
280 
281 			case	12:	/* Swiss French */
282 				dos_to_iso = &dos_to_iso_cp_437[0];
283 			break;
284 
285 			case	13:	/* Swiss German */
286 				dos_to_iso = &dos_to_iso_cp_437[0];
287 			break;
288 
289 			case	14:	/* United Kingdom */
290 				dos_to_iso = &dos_to_iso_cp_437[0];
291 
292 			break;
293 
294 			default:
295 				dos_to_iso = &dos_to_iso_cp_437[0];
296 			break;
297 		}
298 	}else{
299 		switch(code_page_overide){
300 			case CODE_PAGE_US:
301 				dos_to_iso = &dos_to_iso_cp_437[0];
302 			break;
303 
304 			case CODE_PAGE_MULTILINGUAL:
305 				dos_to_iso = &dos_to_iso_cp_850[0];
306 			break;
307 
308 			case CODE_PAGE_PORTUGAL:
309 				dos_to_iso = &dos_to_iso_cp_860[0];
310 			break;
311 
312 			case CODE_PAGE_CANADA_FRENCH:
313 				dos_to_iso = &dos_to_iso_cp_863[0];
314 			break;
315 
316 			case CODE_PAGE_NORWAY:
317 				dos_to_iso = &dos_to_iso_cp_865[0];
318 			break;
319 		}
320 	}
321 
322 #endif
323 #ifdef DOS_BUILD
324 	if(!code_page_overide){
325 		{
326 		union REGS regs;
327 		regs.h.ah = 0x66;	/* get/set global code page */
328 		regs.h.al = 0x01;		/* get */
329 		intdos(&regs, &regs);
330 		type = regs.x.bx;
331 		}
332 		switch(type){
333 			case	437:	/* United States */
334 				dos_to_iso = &dos_to_iso_cp_437[0];
335 			break;
336 
337 			case	850:	/* Multilingual */
338 				dos_to_iso = &dos_to_iso_cp_850[0];
339 			break;
340 
341 			case	860:	/* Portuguese */
342 				dos_to_iso = &dos_to_iso_cp_860[0];
343 			break;
344 
345 			case	863:	/* Canadian French */
346 				dos_to_iso = &dos_to_iso_cp_863[0];
347 			break;
348 
349 			case	865:	/* Danish */
350 				dos_to_iso = &dos_to_iso_cp_865[0];
351 			break;
352 
353 			default:
354 				dos_to_iso = &dos_to_iso_cp_437[0];
355 			break;
356 		}
357 	}else{
358 		switch(code_page_overide){
359 			case CODE_PAGE_US:
360 				dos_to_iso = &dos_to_iso_cp_437[0];
361 			break;
362 
363 			case CODE_PAGE_MULTILINGUAL:
364 				dos_to_iso = &dos_to_iso_cp_850[0];
365 			break;
366 
367 			case CODE_PAGE_PORTUGAL:
368 				dos_to_iso = &dos_to_iso_cp_860[0];
369 			break;
370 
371 			case CODE_PAGE_CANADA_FRENCH:
372 				dos_to_iso = &dos_to_iso_cp_863[0];
373 			break;
374 
375 			case CODE_PAGE_NORWAY:
376 				dos_to_iso = &dos_to_iso_cp_865[0];
377 			break;
378 		}
379 	}
380 
381 
382 #endif
383 
384     /*	While not EOF, read in chars and send them to out_stream
385      *	if current char is not a CR.
386      */
387 
388     do {
389 		num_read = fread(&tmp_buff[0], 1, 100, in_stream);
390 		i = 0;
391 		out_len = 0;
392 		src_str = dest_str = &tmp_buff[0];
393 		switch (translate_mode){
394 			case CONTENTS_ISO:
395 				{
396 				while ( i++ != num_read ){
397 					if( *src_str == '\r'){
398 						src_str++;
399 						}
400 					else{
401 						out_len++;
402 						*dest_str++ = dos_to_iso[*src_str++];
403 						}
404 					}
405 				}
406 				break;
407 
408 			case CONTENTS_ASCII:
409 				{
410 				while ( i++ != num_read){
411 					if( *src_str == '\r'){
412 						src_str++;
413 						continue;
414 						}
415 					else if ( *src_str > 127 ){
416 						*dest_str++ = (unsigned char) ' ';
417 						src_str++;
418 						out_len++;
419 						}
420 					else{
421 						out_len++;
422 						*dest_str++ = *src_str++;
423 						}
424 					}
425 				}
426 				break;
427 
428 			case CONTENTS_DOS:
429 				{
430 				while ( i++ != num_read){
431 					if( *src_str == '\r'){
432 						src_str++;
433 						continue;
434 						}
435 						*dest_str++ =	*src_str++;
436 						out_len++;
437 					}
438 				}
439 				break;
440 			}
441 		if (out_len > num_read)
442 			out_len = num_read;
443 		if (tmp_buff[out_len-2] == DOS_EOF)
444 			out_len -= 2;
445 		else if (tmp_buff[out_len-1] == DOS_EOF)
446 			out_len -= 1;
447 
448 		if( out_len > 0 &&
449 		    out_len != (i= fwrite(&tmp_buff[0], 1, out_len, out_stream)))
450 			error("Error writing %s.", out_file_name);
451 
452 		} while (!feof(in_stream));
453 
454 	fclose(out_stream);
455 	fclose(in_stream);
456 	if(same_name){
457 		unlink(in_file_name);
458 		in_stream = fopen(out_file_name, "r");
459 		out_stream = fopen(in_file_name, "w");
460 #ifdef _F_BIN
461 		setmode(fileno(in_stream), O_BINARY);
462 		setmode(fileno(out_stream), O_BINARY);
463 #endif
464 		while ((num_read = (unsigned)fread(tmp_buff, 1, sizeof tmp_buff, in_stream)) != 0) {
465 		   if( num_read != fwrite(tmp_buff, 1, num_read, out_stream))
466 			error("Error writing %s.", in_file_name);
467 		}
468 		fclose(out_stream);
469 		fclose(in_stream);
470 		unlink(out_file_name);
471 	}
472 	return (0);
473 }
474 
475 void	error(format, args)
476 	char	*format;
477 	char	*args;
478 {
479 	fprintf(stderr, "dos2unix: ");
480 	fprintf(stderr, format, args);
481 	fprintf(stderr, "  %s.\n", strerror(errno));
482 	exit(1);
483 }
484 
485 void usage()
486 {
487 	fprintf(stderr, "usage: dos2unix [ -ascii ] [ -iso ] [ -7 ] [ originalfile [ convertedfile ] ]\n");
488 	exit(1);
489 }
490 
491