1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2003 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Converts files from one char set to another 31 * 32 * Written 11/09/87 Eddy Bell 33 * 34 */ 35 36 37 /* 38 * INCLUDED and DEFINES 39 */ 40 #include <stdio.h> 41 #include <fcntl.h> 42 #include <sys/systeminfo.h> 43 #include <stdlib.h> 44 #include <string.h> 45 #include <errno.h> 46 47 /*#include <io.h> for microsoft c 4.0 */ 48 49 #define CONTENTS_ASCII 0 50 #define CONTENTS_ASCII8 1 51 #define CONTENTS_ISO 2 52 #define CONTENTS_DOS 3 53 #ifdef _F_BIN 54 #define DOS_BUILD 1 55 #else 56 #define UNIX_BUILD 1 57 #endif 58 59 /****************************************************************************** 60 * INCLUDES AND DEFINES 61 ******************************************************************************/ 62 #ifdef UNIX_BUILD 63 #include <sys/types.h> 64 #include <sys/kbio.h> 65 #include <sys/time.h> 66 #include <fcntl.h> 67 #include "../sys/dos_iso.h" 68 #endif 69 70 #ifdef DOS_BUILD 71 #include <dos.h> 72 #include "..\sys\dos_iso.h" 73 #endif 74 75 76 #define GLOBAL 77 #define LOCAL static 78 #define VOID int 79 #define BOOL int 80 81 #define FALSE 0 82 #define TRUE ~FALSE 83 84 #define CR 0x0D 85 #define LF 0x0A 86 #define DOS_EOF 0x1A 87 #define MAXLEN 1024 88 89 90 /****************************************************************************** 91 * FUNCTION AND VARIABLE DECLARATIONS 92 ******************************************************************************/ 93 static void error(); 94 static void usage(); 95 static int tmpfd = -1; 96 97 /****************************************************************************** 98 * ENTRY POINTS 99 ******************************************************************************/ 100 101 int 102 main(int argc, char **argv) 103 { 104 FILE *in_stream = NULL; 105 FILE *out_stream = NULL; 106 unsigned char tmp_buff[512]; 107 unsigned char *src_str, *dest_str; 108 char *in_file_name, *out_file_name; 109 int num_read, i, j, out_len, translate_mode, same_name; /* char count for fread() */ 110 unsigned char * dos_to_iso; 111 int type; 112 int code_page_overide; /* over ride of default codepage */ 113 #ifdef UNIX_BUILD 114 int kbdfd; 115 #endif 116 char sysinfo_str[MAXLEN]; 117 118 same_name = FALSE; 119 out_file_name = (char *)0; 120 121 /* The filename parameter is positionally dependent - it must be the 122 * second argument, immediately following the program name. Except 123 * when a char set switch is passed then the file name must be third 124 * argument. 125 */ 126 127 argv++; 128 in_stream = stdin; 129 out_stream = stdout; 130 j = 0; /* count for file names 0 -> source 1-> dest */ 131 translate_mode = CONTENTS_ISO; /*default trans mode*/ 132 code_page_overide = 0; 133 for (i=1; i<argc; i++) { 134 if (*argv[0] == '-') { 135 if (argc > 1 && !strncmp(*argv,"-iso",4)) { 136 translate_mode = CONTENTS_ISO; 137 argv++; 138 } else if (argc > 1 && !strncmp(*argv,"-7",2)) { 139 translate_mode = CONTENTS_ASCII; 140 argv++; 141 } else if (argc > 1 && !strncmp(*argv,"-ascii",6)) { 142 translate_mode = CONTENTS_DOS; 143 argv++; 144 } else if (argc > 1 && !strncmp(*argv,"-437",4)) { 145 code_page_overide = CODE_PAGE_US; 146 argv++; 147 } else if (argc > 1 && !strncmp(*argv,"-850",4)) { 148 code_page_overide = CODE_PAGE_MULTILINGUAL; 149 argv++; 150 } else if (argc > 1 && !strncmp(*argv,"-860",4)) { 151 code_page_overide = CODE_PAGE_PORTUGAL; 152 argv++; 153 } else if (argc > 1 && !strncmp(*argv,"-863",4)) { 154 code_page_overide = CODE_PAGE_CANADA_FRENCH; 155 argv++; 156 } else if (argc > 1 && !strncmp(*argv,"-865",4)) { 157 code_page_overide = CODE_PAGE_NORWAY; 158 argv++; 159 } else 160 argv++; 161 continue; 162 }else{ /* not a command so must be filename */ 163 switch(j){ 164 case IN_FILE: /* open in file from cmdline */ 165 in_file_name = *argv; 166 j++; /* next file name is outfile */ 167 break; 168 169 case OUT_FILE: /* open out file from cmdline */ 170 out_file_name = *argv; 171 j++; 172 break; 173 174 default: 175 usage(); 176 } 177 } 178 179 180 argv++; 181 } 182 183 /* input file is specified */ 184 if (j > 0) { 185 in_stream = fopen(in_file_name, "r"); 186 if (in_stream == NULL) 187 error("Couldn't open input file %s.", in_file_name); 188 } 189 190 /* output file is secified */ 191 if (j > 1) { 192 if(!strcmp(in_file_name, out_file_name)){ 193 /* input and output have same name */ 194 if (access(out_file_name, 2)) 195 error("%s not writable.", out_file_name); 196 strcpy(out_file_name, "/tmp/udXXXXXX"); 197 tmpfd = mkstemp(out_file_name); 198 if (tmpfd == -1) { 199 error("Couldn't create output file %s.", 200 out_file_name); 201 } 202 (void) close(tmpfd); 203 same_name = TRUE; 204 } else 205 same_name = FALSE; 206 out_stream = fopen(out_file_name, "w"); 207 if (out_stream == NULL) { 208 (void) unlink(out_file_name); 209 error("Couldn't open output file %s.", out_file_name); 210 } 211 } 212 213 #ifdef _F_BIN 214 setmode(fileno(in_stream), O_BINARY); 215 setmode(fileno(out_stream), O_BINARY); 216 #endif 217 218 #ifdef UNIX_BUILD 219 if(!code_page_overide){ 220 if (sysinfo(SI_ARCHITECTURE,sysinfo_str,MAXLEN) < 0) { 221 fprintf(stderr,"could not obtain system information\n"); 222 (void) unlink(out_file_name); 223 exit(1); 224 225 } 226 if (strcmp(sysinfo_str,"i386")) { 227 if ((kbdfd = open("/dev/kbd", O_WRONLY)) < 0) { 228 fprintf(stderr, "could not open /dev/kbd to " 229 "get keyboard type US keyboard assumed\n"); 230 } 231 if (ioctl(kbdfd, KIOCLAYOUT, &type) < 0) { 232 fprintf(stderr,"could not get keyboard type US keyboard assumed\n"); 233 } 234 } else { 235 type = 0; 236 } 237 switch(type){ 238 case 0: 239 case 1: /* United States */ 240 dos_to_iso = &dos_to_iso_cp_437[0]; 241 break; 242 243 case 2: /* Belgian French */ 244 dos_to_iso = &dos_to_iso_cp_437[0]; 245 break; 246 247 case 3: /* Canadian French */ 248 dos_to_iso = &dos_to_iso_cp_863[0]; 249 break; 250 251 case 4: /* Danish */ 252 dos_to_iso = &dos_to_iso_cp_865[0]; 253 break; 254 255 case 5: /* German */ 256 dos_to_iso = &dos_to_iso_cp_437[0]; 257 break; 258 259 case 6: /* Italian */ 260 dos_to_iso = &dos_to_iso_cp_437[0]; 261 break; 262 263 case 7: /* Netherlands Dutch */ 264 dos_to_iso = &dos_to_iso_cp_437[0]; 265 break; 266 267 case 8: /* Norwegian */ 268 dos_to_iso = &dos_to_iso_cp_865[0]; 269 break; 270 271 case 9: /* Portuguese */ 272 dos_to_iso = &dos_to_iso_cp_860[0]; 273 break; 274 275 case 10: /* Spanish */ 276 dos_to_iso = &dos_to_iso_cp_437[0]; 277 break; 278 279 case 11: /* Swedish Finnish */ 280 dos_to_iso = &dos_to_iso_cp_437[0]; 281 break; 282 283 case 12: /* Swiss French */ 284 dos_to_iso = &dos_to_iso_cp_437[0]; 285 break; 286 287 case 13: /* Swiss German */ 288 dos_to_iso = &dos_to_iso_cp_437[0]; 289 break; 290 291 case 14: /* United Kingdom */ 292 dos_to_iso = &dos_to_iso_cp_437[0]; 293 294 break; 295 296 default: 297 dos_to_iso = &dos_to_iso_cp_437[0]; 298 break; 299 } 300 }else{ 301 switch(code_page_overide){ 302 case CODE_PAGE_US: 303 dos_to_iso = &dos_to_iso_cp_437[0]; 304 break; 305 306 case CODE_PAGE_MULTILINGUAL: 307 dos_to_iso = &dos_to_iso_cp_850[0]; 308 break; 309 310 case CODE_PAGE_PORTUGAL: 311 dos_to_iso = &dos_to_iso_cp_860[0]; 312 break; 313 314 case CODE_PAGE_CANADA_FRENCH: 315 dos_to_iso = &dos_to_iso_cp_863[0]; 316 break; 317 318 case CODE_PAGE_NORWAY: 319 dos_to_iso = &dos_to_iso_cp_865[0]; 320 break; 321 } 322 } 323 324 #endif 325 #ifdef DOS_BUILD 326 if(!code_page_overide){ 327 { 328 union REGS regs; 329 regs.h.ah = 0x66; /* get/set global code page */ 330 regs.h.al = 0x01; /* get */ 331 intdos(®s, ®s); 332 type = regs.x.bx; 333 } 334 switch(type){ 335 case 437: /* United States */ 336 dos_to_iso = &dos_to_iso_cp_437[0]; 337 break; 338 339 case 850: /* Multilingual */ 340 dos_to_iso = &dos_to_iso_cp_850[0]; 341 break; 342 343 case 860: /* Portuguese */ 344 dos_to_iso = &dos_to_iso_cp_860[0]; 345 break; 346 347 case 863: /* Canadian French */ 348 dos_to_iso = &dos_to_iso_cp_863[0]; 349 break; 350 351 case 865: /* Danish */ 352 dos_to_iso = &dos_to_iso_cp_865[0]; 353 break; 354 355 default: 356 dos_to_iso = &dos_to_iso_cp_437[0]; 357 break; 358 } 359 }else{ 360 switch(code_page_overide){ 361 case CODE_PAGE_US: 362 dos_to_iso = &dos_to_iso_cp_437[0]; 363 break; 364 365 case CODE_PAGE_MULTILINGUAL: 366 dos_to_iso = &dos_to_iso_cp_850[0]; 367 break; 368 369 case CODE_PAGE_PORTUGAL: 370 dos_to_iso = &dos_to_iso_cp_860[0]; 371 break; 372 373 case CODE_PAGE_CANADA_FRENCH: 374 dos_to_iso = &dos_to_iso_cp_863[0]; 375 break; 376 377 case CODE_PAGE_NORWAY: 378 dos_to_iso = &dos_to_iso_cp_865[0]; 379 break; 380 } 381 } 382 383 384 #endif 385 386 /* While not EOF, read in chars and send them to out_stream 387 * if current char is not a CR. 388 */ 389 390 do { 391 num_read = fread(&tmp_buff[0], 1, 100, in_stream); 392 i = 0; 393 out_len = 0; 394 src_str = dest_str = &tmp_buff[0]; 395 switch (translate_mode){ 396 case CONTENTS_ISO: 397 { 398 while ( i++ != num_read ){ 399 if( *src_str == '\r'){ 400 src_str++; 401 } 402 else{ 403 out_len++; 404 *dest_str++ = dos_to_iso[*src_str++]; 405 } 406 } 407 } 408 break; 409 410 case CONTENTS_ASCII: 411 { 412 while ( i++ != num_read){ 413 if( *src_str == '\r'){ 414 src_str++; 415 continue; 416 } 417 else if ( *src_str > 127 ){ 418 *dest_str++ = (unsigned char) ' '; 419 src_str++; 420 out_len++; 421 } 422 else{ 423 out_len++; 424 *dest_str++ = *src_str++; 425 } 426 } 427 } 428 break; 429 430 case CONTENTS_DOS: 431 { 432 while ( i++ != num_read){ 433 if( *src_str == '\r'){ 434 src_str++; 435 continue; 436 } 437 *dest_str++ = *src_str++; 438 out_len++; 439 } 440 } 441 break; 442 } 443 if (out_len > num_read) 444 out_len = num_read; 445 if (tmp_buff[out_len-2] == DOS_EOF) 446 out_len -= 2; 447 else if (tmp_buff[out_len-1] == DOS_EOF) 448 out_len -= 1; 449 450 if( out_len > 0 && 451 out_len != (i= fwrite(&tmp_buff[0], 1, out_len, out_stream))) 452 error("Error writing %s.", out_file_name); 453 454 } while (!feof(in_stream)); 455 456 fclose(out_stream); 457 fclose(in_stream); 458 if(same_name){ 459 unlink(in_file_name); 460 in_stream = fopen(out_file_name, "r"); 461 out_stream = fopen(in_file_name, "w"); 462 #ifdef _F_BIN 463 setmode(fileno(in_stream), O_BINARY); 464 setmode(fileno(out_stream), O_BINARY); 465 #endif 466 while ((num_read = (unsigned)fread(tmp_buff, 1, sizeof tmp_buff, in_stream)) != 0) { 467 if( num_read != fwrite(tmp_buff, 1, num_read, out_stream)) 468 error("Error writing %s.", in_file_name); 469 } 470 fclose(out_stream); 471 fclose(in_stream); 472 unlink(out_file_name); 473 } 474 return (0); 475 } 476 477 void error(format, args) 478 char *format; 479 char *args; 480 { 481 fprintf(stderr, "dos2unix: "); 482 fprintf(stderr, format, args); 483 fprintf(stderr, " %s.\n", strerror(errno)); 484 exit(1); 485 } 486 487 void usage() 488 { 489 fprintf(stderr, "usage: dos2unix [ -ascii ] [ -iso ] [ -7 ] [ originalfile [ convertedfile ] ]\n"); 490 exit(1); 491 } 492 493