xref: /illumos-gate/usr/src/cmd/sunpc/other/dos2unix.c (revision a629ded1d7b2e67c2028ccbc5ba9099328cc4e1b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2003 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  *	Converts files from one char set to another
31  *
32  *	Written 11/09/87	Eddy Bell
33  *
34  */
35 
36 
37 /*
38  *  INCLUDED and DEFINES
39  */
40 #include	<stdio.h>
41 #include	<fcntl.h>
42 #include	<sys/systeminfo.h>
43 #include	<stdlib.h>
44 #include	<string.h>
45 #include	<errno.h>
46 
47 /*#include	<io.h>			for microsoft c 4.0 */
48 
49 #define 	CONTENTS_ASCII	0
50 #define 	CONTENTS_ASCII8 1
51 #define 	CONTENTS_ISO	2
52 #define 	CONTENTS_DOS	3
53 #ifdef _F_BIN
54 #define DOS_BUILD 1
55 #else
56 #define UNIX_BUILD 1
57 #endif
58 
59 /******************************************************************************
60  * INCLUDES AND DEFINES
61  ******************************************************************************/
62 #ifdef UNIX_BUILD
63 #include <sys/types.h>
64 #include	<sys/kbio.h>
65 #include	<sys/time.h>
66 #include	<fcntl.h>
67 #include "../sys/dos_iso.h"
68 #endif
69 
70 #ifdef DOS_BUILD
71 #include <dos.h>
72 #include "..\sys\dos_iso.h"
73 #endif
74 
75 
76 #define 	GLOBAL
77 #define 	LOCAL	static
78 #define 	VOID	int
79 #define 	BOOL	int
80 
81 #define 	FALSE	0
82 #define 	TRUE	~FALSE
83 
84 #define 	CR	0x0D
85 #define 	LF	0x0A
86 #define 	DOS_EOF 0x1A
87 #define		MAXLEN	1024
88 
89 
90 /******************************************************************************
91  * FUNCTION AND VARIABLE DECLARATIONS
92  ******************************************************************************/
93 static	void	error();
94 static	void	usage();
95 static int	tmpfd = -1;
96 
97 /******************************************************************************
98 * ENTRY POINTS
99  ******************************************************************************/
100 
101 int
102 main(int argc, char **argv)
103 {
104    FILE *in_stream = NULL;
105    FILE *out_stream = NULL;
106 	unsigned char tmp_buff[512];
107 	unsigned char *src_str, *dest_str;
108 	char	 *in_file_name, *out_file_name;
109    int num_read, i, j, out_len, translate_mode, same_name;			       /* char count for fread() */
110    unsigned char * dos_to_iso;
111 	int	type;
112 	int	code_page_overide; /* over ride of default codepage */
113 #ifdef UNIX_BUILD
114 	int	kbdfd;
115 #endif
116 	char	sysinfo_str[MAXLEN];
117 
118 	same_name = FALSE;
119 	out_file_name = (char *)0;
120 
121     /*	The filename parameter is positionally dependent - it must be the
122      *	second argument, immediately following the program name. Except
123      *	when a char set switch is passed then the file name must be third
124      *	argument.
125      */
126 
127 	argv++;
128 	in_stream = stdin;
129 	out_stream = stdout;
130 	j = 0;  /* count for file names 0 -> source 1-> dest */
131 	translate_mode = CONTENTS_ISO; /*default trans mode*/
132 	code_page_overide = 0;
133 	for (i=1; i<argc; i++) {
134       		if (*argv[0] == '-') {
135 			if (argc > 1 && !strncmp(*argv,"-iso",4)) {
136 				translate_mode = CONTENTS_ISO;
137 				argv++;
138 			} else if (argc > 1 && !strncmp(*argv,"-7",2)) {
139 				translate_mode = CONTENTS_ASCII;
140 				argv++;
141 			} else if (argc > 1 && !strncmp(*argv,"-ascii",6)) {
142 				translate_mode = CONTENTS_DOS;
143 				argv++;
144 			} else if (argc > 1 && !strncmp(*argv,"-437",4)) {
145 				code_page_overide = CODE_PAGE_US;
146 				argv++;
147 			} else if (argc > 1 && !strncmp(*argv,"-850",4)) {
148 				code_page_overide = CODE_PAGE_MULTILINGUAL;
149 				argv++;
150 			} else if (argc > 1 && !strncmp(*argv,"-860",4)) {
151 				code_page_overide = CODE_PAGE_PORTUGAL;
152 				argv++;
153 			} else if (argc > 1 && !strncmp(*argv,"-863",4)) {
154 				code_page_overide = CODE_PAGE_CANADA_FRENCH;
155 				argv++;
156 			} else if (argc > 1 && !strncmp(*argv,"-865",4)) {
157 				code_page_overide = CODE_PAGE_NORWAY;
158 				argv++;
159 			} else
160 				argv++;
161 			continue;
162 		}else{  /* not a command so must be filename */
163 			switch(j){
164 				case IN_FILE:	/* open in file from cmdline */
165 		       			in_file_name = *argv;
166 		       			j++;  /* next file name is outfile */
167 			       	break;
168 
169 				case OUT_FILE:	/* open out file from cmdline */
170 					out_file_name = *argv;
171 					j++;
172 			   	break;
173 
174 				default:
175 					usage();
176 			}
177 		}
178 
179 
180 	argv++;
181 	}
182 
183 	/* input file is specified */
184 	if (j > 0) {
185 		in_stream = fopen(in_file_name, "r");
186 		if (in_stream == NULL)
187 			error("Couldn't open input file %s.", in_file_name);
188 	}
189 
190 	/* output file is secified */
191 	if (j > 1) {
192 		if(!strcmp(in_file_name, out_file_name)){
193 			/* input and output have same name */
194 			if (access(out_file_name, 2))
195 				error("%s not writable.", out_file_name);
196 			strcpy(out_file_name, "/tmp/udXXXXXX");
197 			tmpfd = mkstemp(out_file_name);
198 			if (tmpfd == -1) {
199 				error("Couldn't create output file %s.",
200 				    out_file_name);
201 			}
202 			(void) close(tmpfd);
203 			same_name = TRUE;
204 		} else
205 			same_name = FALSE;
206 		out_stream = fopen(out_file_name, "w");
207 		if (out_stream == NULL) {
208 			(void) unlink(out_file_name);
209 			error("Couldn't open output file %s.", out_file_name);
210 		}
211 	}
212 
213 #ifdef _F_BIN
214 	setmode(fileno(in_stream), O_BINARY);
215 	setmode(fileno(out_stream), O_BINARY);
216 #endif
217 
218 #ifdef UNIX_BUILD
219 	if(!code_page_overide){
220 		if (sysinfo(SI_ARCHITECTURE,sysinfo_str,MAXLEN)  < 0) {
221 			fprintf(stderr,"could not obtain system information\n");
222 			(void) unlink(out_file_name);
223 			exit(1);
224 
225 		}
226 		if (strcmp(sysinfo_str,"i386")) {
227 			if ((kbdfd = open("/dev/kbd", O_WRONLY)) < 0) {
228 				fprintf(stderr, "could not open /dev/kbd to "
229 				    "get keyboard type US keyboard assumed\n");
230 			}
231 			if (ioctl(kbdfd, KIOCLAYOUT, &type) < 0) {
232 				fprintf(stderr,"could not get keyboard type US keyboard assumed\n");
233 			}
234 		} else {
235 			type = 0;
236 		}
237 		switch(type){
238 			case	0:
239 			case	1:	/* United States */
240 				dos_to_iso = &dos_to_iso_cp_437[0];
241 			break;
242 
243 			case	2:	/* Belgian French */
244 				dos_to_iso = &dos_to_iso_cp_437[0];
245 			break;
246 
247 			case	3:	/* Canadian French */
248 				dos_to_iso = &dos_to_iso_cp_863[0];
249 			break;
250 
251 			case	4:	/* Danish */
252 				dos_to_iso = &dos_to_iso_cp_865[0];
253 			break;
254 
255 			case	5:	/* German */
256 				dos_to_iso = &dos_to_iso_cp_437[0];
257 			break;
258 
259 			case	6:	/* Italian */
260 				dos_to_iso = &dos_to_iso_cp_437[0];
261 			break;
262 
263 			case	7:	/* Netherlands Dutch */
264 				dos_to_iso = &dos_to_iso_cp_437[0];
265 			break;
266 
267 			case	8:	/* Norwegian */
268 				dos_to_iso = &dos_to_iso_cp_865[0];
269 			break;
270 
271 			case	9:	/* Portuguese */
272 				dos_to_iso = &dos_to_iso_cp_860[0];
273 			break;
274 
275 			case	10:	/* Spanish */
276 				dos_to_iso = &dos_to_iso_cp_437[0];
277 			break;
278 
279 			case	11:	/* Swedish Finnish */
280 				dos_to_iso = &dos_to_iso_cp_437[0];
281 			break;
282 
283 			case	12:	/* Swiss French */
284 				dos_to_iso = &dos_to_iso_cp_437[0];
285 			break;
286 
287 			case	13:	/* Swiss German */
288 				dos_to_iso = &dos_to_iso_cp_437[0];
289 			break;
290 
291 			case	14:	/* United Kingdom */
292 				dos_to_iso = &dos_to_iso_cp_437[0];
293 
294 			break;
295 
296 			default:
297 				dos_to_iso = &dos_to_iso_cp_437[0];
298 			break;
299 		}
300 	}else{
301 		switch(code_page_overide){
302 			case CODE_PAGE_US:
303 				dos_to_iso = &dos_to_iso_cp_437[0];
304 			break;
305 
306 			case CODE_PAGE_MULTILINGUAL:
307 				dos_to_iso = &dos_to_iso_cp_850[0];
308 			break;
309 
310 			case CODE_PAGE_PORTUGAL:
311 				dos_to_iso = &dos_to_iso_cp_860[0];
312 			break;
313 
314 			case CODE_PAGE_CANADA_FRENCH:
315 				dos_to_iso = &dos_to_iso_cp_863[0];
316 			break;
317 
318 			case CODE_PAGE_NORWAY:
319 				dos_to_iso = &dos_to_iso_cp_865[0];
320 			break;
321 		}
322 	}
323 
324 #endif
325 #ifdef DOS_BUILD
326 	if(!code_page_overide){
327 		{
328 		union REGS regs;
329 		regs.h.ah = 0x66;	/* get/set global code page */
330 		regs.h.al = 0x01;		/* get */
331 		intdos(&regs, &regs);
332 		type = regs.x.bx;
333 		}
334 		switch(type){
335 			case	437:	/* United States */
336 				dos_to_iso = &dos_to_iso_cp_437[0];
337 			break;
338 
339 			case	850:	/* Multilingual */
340 				dos_to_iso = &dos_to_iso_cp_850[0];
341 			break;
342 
343 			case	860:	/* Portuguese */
344 				dos_to_iso = &dos_to_iso_cp_860[0];
345 			break;
346 
347 			case	863:	/* Canadian French */
348 				dos_to_iso = &dos_to_iso_cp_863[0];
349 			break;
350 
351 			case	865:	/* Danish */
352 				dos_to_iso = &dos_to_iso_cp_865[0];
353 			break;
354 
355 			default:
356 				dos_to_iso = &dos_to_iso_cp_437[0];
357 			break;
358 		}
359 	}else{
360 		switch(code_page_overide){
361 			case CODE_PAGE_US:
362 				dos_to_iso = &dos_to_iso_cp_437[0];
363 			break;
364 
365 			case CODE_PAGE_MULTILINGUAL:
366 				dos_to_iso = &dos_to_iso_cp_850[0];
367 			break;
368 
369 			case CODE_PAGE_PORTUGAL:
370 				dos_to_iso = &dos_to_iso_cp_860[0];
371 			break;
372 
373 			case CODE_PAGE_CANADA_FRENCH:
374 				dos_to_iso = &dos_to_iso_cp_863[0];
375 			break;
376 
377 			case CODE_PAGE_NORWAY:
378 				dos_to_iso = &dos_to_iso_cp_865[0];
379 			break;
380 		}
381 	}
382 
383 
384 #endif
385 
386     /*	While not EOF, read in chars and send them to out_stream
387      *	if current char is not a CR.
388      */
389 
390     do {
391 		num_read = fread(&tmp_buff[0], 1, 100, in_stream);
392 		i = 0;
393 		out_len = 0;
394 		src_str = dest_str = &tmp_buff[0];
395 		switch (translate_mode){
396 			case CONTENTS_ISO:
397 				{
398 				while ( i++ != num_read ){
399 					if( *src_str == '\r'){
400 						src_str++;
401 						}
402 					else{
403 						out_len++;
404 						*dest_str++ = dos_to_iso[*src_str++];
405 						}
406 					}
407 				}
408 				break;
409 
410 			case CONTENTS_ASCII:
411 				{
412 				while ( i++ != num_read){
413 					if( *src_str == '\r'){
414 						src_str++;
415 						continue;
416 						}
417 					else if ( *src_str > 127 ){
418 						*dest_str++ = (unsigned char) ' ';
419 						src_str++;
420 						out_len++;
421 						}
422 					else{
423 						out_len++;
424 						*dest_str++ = *src_str++;
425 						}
426 					}
427 				}
428 				break;
429 
430 			case CONTENTS_DOS:
431 				{
432 				while ( i++ != num_read){
433 					if( *src_str == '\r'){
434 						src_str++;
435 						continue;
436 						}
437 						*dest_str++ =	*src_str++;
438 						out_len++;
439 					}
440 				}
441 				break;
442 			}
443 		if (out_len > num_read)
444 			out_len = num_read;
445 		if (tmp_buff[out_len-2] == DOS_EOF)
446 			out_len -= 2;
447 		else if (tmp_buff[out_len-1] == DOS_EOF)
448 			out_len -= 1;
449 
450 		if( out_len > 0 &&
451 		    out_len != (i= fwrite(&tmp_buff[0], 1, out_len, out_stream)))
452 			error("Error writing %s.", out_file_name);
453 
454 		} while (!feof(in_stream));
455 
456 	fclose(out_stream);
457 	fclose(in_stream);
458 	if(same_name){
459 		unlink(in_file_name);
460 		in_stream = fopen(out_file_name, "r");
461 		out_stream = fopen(in_file_name, "w");
462 #ifdef _F_BIN
463 		setmode(fileno(in_stream), O_BINARY);
464 		setmode(fileno(out_stream), O_BINARY);
465 #endif
466 		while ((num_read = (unsigned)fread(tmp_buff, 1, sizeof tmp_buff, in_stream)) != 0) {
467 		   if( num_read != fwrite(tmp_buff, 1, num_read, out_stream))
468 			error("Error writing %s.", in_file_name);
469 		}
470 		fclose(out_stream);
471 		fclose(in_stream);
472 		unlink(out_file_name);
473 	}
474 	return (0);
475 }
476 
477 void	error(format, args)
478 	char	*format;
479 	char	*args;
480 {
481 	fprintf(stderr, "dos2unix: ");
482 	fprintf(stderr, format, args);
483 	fprintf(stderr, "  %s.\n", strerror(errno));
484 	exit(1);
485 }
486 
487 void usage()
488 {
489 	fprintf(stderr, "usage: dos2unix [ -ascii ] [ -iso ] [ -7 ] [ originalfile [ convertedfile ] ]\n");
490 	exit(1);
491 }
492 
493