1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2003 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 /* 28 * Converts files from one char set to another 29 * 30 * Written 11/09/87 Eddy Bell 31 * 32 */ 33 34 35 /* 36 * INCLUDED and DEFINES 37 */ 38 #include <stdio.h> 39 #include <fcntl.h> 40 #include <sys/systeminfo.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <errno.h> 44 45 /*#include <io.h> for microsoft c 4.0 */ 46 47 #define CONTENTS_ASCII 0 48 #define CONTENTS_ASCII8 1 49 #define CONTENTS_ISO 2 50 #define CONTENTS_DOS 3 51 #ifdef _F_BIN 52 #define DOS_BUILD 1 53 #else 54 #define UNIX_BUILD 1 55 #endif 56 57 /****************************************************************************** 58 * INCLUDES AND DEFINES 59 ******************************************************************************/ 60 #ifdef UNIX_BUILD 61 #include <sys/types.h> 62 #include <sys/kbio.h> 63 #include <sys/time.h> 64 #include <fcntl.h> 65 #include "../sys/dos_iso.h" 66 #endif 67 68 #ifdef DOS_BUILD 69 #include <dos.h> 70 #include "..\sys\dos_iso.h" 71 #endif 72 73 74 #define GLOBAL 75 #define LOCAL static 76 #define VOID int 77 #define BOOL int 78 79 #define FALSE 0 80 #define TRUE ~FALSE 81 82 #define CR 0x0D 83 #define LF 0x0A 84 #define DOS_EOF 0x1A 85 #define MAXLEN 1024 86 87 88 /****************************************************************************** 89 * FUNCTION AND VARIABLE DECLARATIONS 90 ******************************************************************************/ 91 static void error(); 92 static void usage(); 93 static int tmpfd = -1; 94 95 /****************************************************************************** 96 * ENTRY POINTS 97 ******************************************************************************/ 98 99 int 100 main(int argc, char **argv) 101 { 102 FILE *in_stream = NULL; 103 FILE *out_stream = NULL; 104 unsigned char tmp_buff[512]; 105 unsigned char *src_str, *dest_str; 106 char *in_file_name, *out_file_name; 107 int num_read, i, j, out_len, translate_mode, same_name; /* char count for fread() */ 108 unsigned char * dos_to_iso; 109 int type; 110 int code_page_overide; /* over ride of default codepage */ 111 #ifdef UNIX_BUILD 112 int kbdfd; 113 #endif 114 char sysinfo_str[MAXLEN]; 115 116 same_name = FALSE; 117 out_file_name = (char *)0; 118 119 /* The filename parameter is positionally dependent - it must be the 120 * second argument, immediately following the program name. Except 121 * when a char set switch is passed then the file name must be third 122 * argument. 123 */ 124 125 argv++; 126 in_stream = stdin; 127 out_stream = stdout; 128 j = 0; /* count for file names 0 -> source 1-> dest */ 129 translate_mode = CONTENTS_ISO; /*default trans mode*/ 130 code_page_overide = 0; 131 for (i=1; i<argc; i++) { 132 if (*argv[0] == '-') { 133 if (argc > 1 && !strncmp(*argv,"-iso",4)) { 134 translate_mode = CONTENTS_ISO; 135 argv++; 136 } else if (argc > 1 && !strncmp(*argv,"-7",2)) { 137 translate_mode = CONTENTS_ASCII; 138 argv++; 139 } else if (argc > 1 && !strncmp(*argv,"-ascii",6)) { 140 translate_mode = CONTENTS_DOS; 141 argv++; 142 } else if (argc > 1 && !strncmp(*argv,"-437",4)) { 143 code_page_overide = CODE_PAGE_US; 144 argv++; 145 } else if (argc > 1 && !strncmp(*argv,"-850",4)) { 146 code_page_overide = CODE_PAGE_MULTILINGUAL; 147 argv++; 148 } else if (argc > 1 && !strncmp(*argv,"-860",4)) { 149 code_page_overide = CODE_PAGE_PORTUGAL; 150 argv++; 151 } else if (argc > 1 && !strncmp(*argv,"-863",4)) { 152 code_page_overide = CODE_PAGE_CANADA_FRENCH; 153 argv++; 154 } else if (argc > 1 && !strncmp(*argv,"-865",4)) { 155 code_page_overide = CODE_PAGE_NORWAY; 156 argv++; 157 } else 158 argv++; 159 continue; 160 }else{ /* not a command so must be filename */ 161 switch(j){ 162 case IN_FILE: /* open in file from cmdline */ 163 in_file_name = *argv; 164 j++; /* next file name is outfile */ 165 break; 166 167 case OUT_FILE: /* open out file from cmdline */ 168 out_file_name = *argv; 169 j++; 170 break; 171 172 default: 173 usage(); 174 } 175 } 176 177 178 argv++; 179 } 180 181 /* input file is specified */ 182 if (j > 0) { 183 in_stream = fopen(in_file_name, "r"); 184 if (in_stream == NULL) 185 error("Couldn't open input file %s.", in_file_name); 186 } 187 188 /* output file is secified */ 189 if (j > 1) { 190 if(!strcmp(in_file_name, out_file_name)){ 191 /* input and output have same name */ 192 if (access(out_file_name, 2)) 193 error("%s not writable.", out_file_name); 194 strcpy(out_file_name, "/tmp/udXXXXXX"); 195 tmpfd = mkstemp(out_file_name); 196 if (tmpfd == -1) { 197 error("Couldn't create output file %s.", 198 out_file_name); 199 } 200 (void) close(tmpfd); 201 same_name = TRUE; 202 } else 203 same_name = FALSE; 204 out_stream = fopen(out_file_name, "w"); 205 if (out_stream == NULL) { 206 (void) unlink(out_file_name); 207 error("Couldn't open output file %s.", out_file_name); 208 } 209 } 210 211 #ifdef _F_BIN 212 setmode(fileno(in_stream), O_BINARY); 213 setmode(fileno(out_stream), O_BINARY); 214 #endif 215 216 #ifdef UNIX_BUILD 217 if(!code_page_overide){ 218 if (sysinfo(SI_ARCHITECTURE,sysinfo_str,MAXLEN) < 0) { 219 fprintf(stderr,"could not obtain system information\n"); 220 (void) unlink(out_file_name); 221 exit(1); 222 223 } 224 if (strcmp(sysinfo_str,"i386")) { 225 if ((kbdfd = open("/dev/kbd", O_WRONLY)) < 0) { 226 fprintf(stderr, "could not open /dev/kbd to " 227 "get keyboard type US keyboard assumed\n"); 228 } 229 if (ioctl(kbdfd, KIOCLAYOUT, &type) < 0) { 230 fprintf(stderr,"could not get keyboard type US keyboard assumed\n"); 231 } 232 } else { 233 type = 0; 234 } 235 switch(type){ 236 case 0: 237 case 1: /* United States */ 238 dos_to_iso = &dos_to_iso_cp_437[0]; 239 break; 240 241 case 2: /* Belgian French */ 242 dos_to_iso = &dos_to_iso_cp_437[0]; 243 break; 244 245 case 3: /* Canadian French */ 246 dos_to_iso = &dos_to_iso_cp_863[0]; 247 break; 248 249 case 4: /* Danish */ 250 dos_to_iso = &dos_to_iso_cp_865[0]; 251 break; 252 253 case 5: /* German */ 254 dos_to_iso = &dos_to_iso_cp_437[0]; 255 break; 256 257 case 6: /* Italian */ 258 dos_to_iso = &dos_to_iso_cp_437[0]; 259 break; 260 261 case 7: /* Netherlands Dutch */ 262 dos_to_iso = &dos_to_iso_cp_437[0]; 263 break; 264 265 case 8: /* Norwegian */ 266 dos_to_iso = &dos_to_iso_cp_865[0]; 267 break; 268 269 case 9: /* Portuguese */ 270 dos_to_iso = &dos_to_iso_cp_860[0]; 271 break; 272 273 case 10: /* Spanish */ 274 dos_to_iso = &dos_to_iso_cp_437[0]; 275 break; 276 277 case 11: /* Swedish Finnish */ 278 dos_to_iso = &dos_to_iso_cp_437[0]; 279 break; 280 281 case 12: /* Swiss French */ 282 dos_to_iso = &dos_to_iso_cp_437[0]; 283 break; 284 285 case 13: /* Swiss German */ 286 dos_to_iso = &dos_to_iso_cp_437[0]; 287 break; 288 289 case 14: /* United Kingdom */ 290 dos_to_iso = &dos_to_iso_cp_437[0]; 291 292 break; 293 294 default: 295 dos_to_iso = &dos_to_iso_cp_437[0]; 296 break; 297 } 298 }else{ 299 switch(code_page_overide){ 300 case CODE_PAGE_US: 301 dos_to_iso = &dos_to_iso_cp_437[0]; 302 break; 303 304 case CODE_PAGE_MULTILINGUAL: 305 dos_to_iso = &dos_to_iso_cp_850[0]; 306 break; 307 308 case CODE_PAGE_PORTUGAL: 309 dos_to_iso = &dos_to_iso_cp_860[0]; 310 break; 311 312 case CODE_PAGE_CANADA_FRENCH: 313 dos_to_iso = &dos_to_iso_cp_863[0]; 314 break; 315 316 case CODE_PAGE_NORWAY: 317 dos_to_iso = &dos_to_iso_cp_865[0]; 318 break; 319 } 320 } 321 322 #endif 323 #ifdef DOS_BUILD 324 if(!code_page_overide){ 325 { 326 union REGS regs; 327 regs.h.ah = 0x66; /* get/set global code page */ 328 regs.h.al = 0x01; /* get */ 329 intdos(®s, ®s); 330 type = regs.x.bx; 331 } 332 switch(type){ 333 case 437: /* United States */ 334 dos_to_iso = &dos_to_iso_cp_437[0]; 335 break; 336 337 case 850: /* Multilingual */ 338 dos_to_iso = &dos_to_iso_cp_850[0]; 339 break; 340 341 case 860: /* Portuguese */ 342 dos_to_iso = &dos_to_iso_cp_860[0]; 343 break; 344 345 case 863: /* Canadian French */ 346 dos_to_iso = &dos_to_iso_cp_863[0]; 347 break; 348 349 case 865: /* Danish */ 350 dos_to_iso = &dos_to_iso_cp_865[0]; 351 break; 352 353 default: 354 dos_to_iso = &dos_to_iso_cp_437[0]; 355 break; 356 } 357 }else{ 358 switch(code_page_overide){ 359 case CODE_PAGE_US: 360 dos_to_iso = &dos_to_iso_cp_437[0]; 361 break; 362 363 case CODE_PAGE_MULTILINGUAL: 364 dos_to_iso = &dos_to_iso_cp_850[0]; 365 break; 366 367 case CODE_PAGE_PORTUGAL: 368 dos_to_iso = &dos_to_iso_cp_860[0]; 369 break; 370 371 case CODE_PAGE_CANADA_FRENCH: 372 dos_to_iso = &dos_to_iso_cp_863[0]; 373 break; 374 375 case CODE_PAGE_NORWAY: 376 dos_to_iso = &dos_to_iso_cp_865[0]; 377 break; 378 } 379 } 380 381 382 #endif 383 384 /* While not EOF, read in chars and send them to out_stream 385 * if current char is not a CR. 386 */ 387 388 do { 389 num_read = fread(&tmp_buff[0], 1, 100, in_stream); 390 i = 0; 391 out_len = 0; 392 src_str = dest_str = &tmp_buff[0]; 393 switch (translate_mode){ 394 case CONTENTS_ISO: 395 { 396 while ( i++ != num_read ){ 397 if( *src_str == '\r'){ 398 src_str++; 399 } 400 else{ 401 out_len++; 402 *dest_str++ = dos_to_iso[*src_str++]; 403 } 404 } 405 } 406 break; 407 408 case CONTENTS_ASCII: 409 { 410 while ( i++ != num_read){ 411 if( *src_str == '\r'){ 412 src_str++; 413 continue; 414 } 415 else if ( *src_str > 127 ){ 416 *dest_str++ = (unsigned char) ' '; 417 src_str++; 418 out_len++; 419 } 420 else{ 421 out_len++; 422 *dest_str++ = *src_str++; 423 } 424 } 425 } 426 break; 427 428 case CONTENTS_DOS: 429 { 430 while ( i++ != num_read){ 431 if( *src_str == '\r'){ 432 src_str++; 433 continue; 434 } 435 *dest_str++ = *src_str++; 436 out_len++; 437 } 438 } 439 break; 440 } 441 if (out_len > num_read) 442 out_len = num_read; 443 if (tmp_buff[out_len-2] == DOS_EOF) 444 out_len -= 2; 445 else if (tmp_buff[out_len-1] == DOS_EOF) 446 out_len -= 1; 447 448 if( out_len > 0 && 449 out_len != (i= fwrite(&tmp_buff[0], 1, out_len, out_stream))) 450 error("Error writing %s.", out_file_name); 451 452 } while (!feof(in_stream)); 453 454 fclose(out_stream); 455 fclose(in_stream); 456 if(same_name){ 457 unlink(in_file_name); 458 in_stream = fopen(out_file_name, "r"); 459 out_stream = fopen(in_file_name, "w"); 460 #ifdef _F_BIN 461 setmode(fileno(in_stream), O_BINARY); 462 setmode(fileno(out_stream), O_BINARY); 463 #endif 464 while ((num_read = (unsigned)fread(tmp_buff, 1, sizeof tmp_buff, in_stream)) != 0) { 465 if( num_read != fwrite(tmp_buff, 1, num_read, out_stream)) 466 error("Error writing %s.", in_file_name); 467 } 468 fclose(out_stream); 469 fclose(in_stream); 470 unlink(out_file_name); 471 } 472 return (0); 473 } 474 475 void error(format, args) 476 char *format; 477 char *args; 478 { 479 fprintf(stderr, "dos2unix: "); 480 fprintf(stderr, format, args); 481 fprintf(stderr, " %s.\n", strerror(errno)); 482 exit(1); 483 } 484 485 void usage() 486 { 487 fprintf(stderr, "usage: dos2unix [ -ascii ] [ -iso ] [ -7 ] [ originalfile [ convertedfile ] ]\n"); 488 exit(1); 489 } 490 491