1 //===- AutoConvert.cpp - Auto conversion between ASCII/EBCDIC -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains functions used for auto conversion between 10 // ASCII/EBCDIC codepages specific to z/OS. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifdef __MVS__ 15 16 #include "llvm/Support/AutoConvert.h" 17 #include <cassert> 18 #include <fcntl.h> 19 #include <sys/stat.h> 20 #include <unistd.h> 21 22 using namespace llvm; 23 24 static int savedStdHandleAutoConversionMode[3] = {-1, -1, -1}; 25 26 int disablezOSAutoConversion(int FD) { 27 static const struct f_cnvrt Convert = { 28 SETCVTOFF, // cvtcmd 29 0, // pccsid 30 0, // fccsid 31 }; 32 33 return fcntl(FD, F_CONTROL_CVT, &Convert); 34 } 35 36 int restorezOSStdHandleAutoConversion(int FD) { 37 assert(FD == STDIN_FILENO || FD == STDOUT_FILENO || FD == STDERR_FILENO); 38 if (savedStdHandleAutoConversionMode[FD] == -1) 39 return 0; 40 struct f_cnvrt Cvt = { 41 savedStdHandleAutoConversionMode[FD], // cvtcmd 42 0, // pccsid 43 0, // fccsid 44 }; 45 return (fcntl(FD, F_CONTROL_CVT, &Cvt)); 46 } 47 48 int enablezOSAutoConversion(int FD) { 49 struct f_cnvrt Query = { 50 QUERYCVT, // cvtcmd 51 0, // pccsid 52 0, // fccsid 53 }; 54 55 if (fcntl(FD, F_CONTROL_CVT, &Query) == -1) 56 return -1; 57 58 // We don't need conversion for UTF-8 tagged files. 59 // TODO: Remove the assumption of ISO8859-1 = UTF-8 here when we fully resolve 60 // problems related to UTF-8 tagged source files. 61 // When the pccsid is not ISO8859-1, autoconversion is still needed. 62 if (Query.pccsid == CCSID_ISO8859_1 && 63 (Query.fccsid == CCSID_UTF_8 || Query.fccsid == CCSID_ISO8859_1)) 64 return 0; 65 66 // Save the state of std handles before we make changes to it. 67 if ((FD == STDIN_FILENO || FD == STDOUT_FILENO || FD == STDERR_FILENO) && 68 savedStdHandleAutoConversionMode[FD] == -1) 69 savedStdHandleAutoConversionMode[FD] = Query.cvtcmd; 70 71 if (FD == STDOUT_FILENO || FD == STDERR_FILENO) 72 Query.cvtcmd = SETCVTON; 73 else 74 Query.cvtcmd = SETCVTALL; 75 76 Query.pccsid = 77 (FD == STDIN_FILENO || FD == STDOUT_FILENO || FD == STDERR_FILENO) 78 ? 0 79 : CCSID_UTF_8; 80 // Assume untagged files to be IBM-1047 encoded. 81 Query.fccsid = (Query.fccsid == FT_UNTAGGED) ? CCSID_IBM_1047 : Query.fccsid; 82 return fcntl(FD, F_CONTROL_CVT, &Query); 83 } 84 85 std::error_code llvm::setzOSFileTag(int FD, int CCSID, bool Text) { 86 assert((!Text || (CCSID != FT_UNTAGGED && CCSID != FT_BINARY)) && 87 "FT_UNTAGGED and FT_BINARY are not allowed for text files"); 88 struct file_tag Tag; 89 Tag.ft_ccsid = CCSID; 90 Tag.ft_txtflag = Text; 91 Tag.ft_deferred = 0; 92 Tag.ft_rsvflags = 0; 93 94 if (fcntl(FD, F_SETTAG, &Tag) == -1) 95 return errnoAsErrorCode(); 96 return std::error_code(); 97 } 98 99 ErrorOr<__ccsid_t> llvm::getzOSFileTag(const char *FileName, const int FD) { 100 // If we have a file descriptor, use it to find out file tagging. Otherwise we 101 // need to use stat() with the file path. 102 if (FD != -1) { 103 struct f_cnvrt Query = { 104 QUERYCVT, // cvtcmd 105 0, // pccsid 106 0, // fccsid 107 }; 108 if (fcntl(FD, F_CONTROL_CVT, &Query) == -1) 109 return std::error_code(errno, std::generic_category()); 110 return Query.fccsid; 111 } 112 struct stat Attr; 113 if (stat(FileName, &Attr) == -1) 114 return std::error_code(errno, std::generic_category()); 115 return Attr.st_tag.ft_ccsid; 116 } 117 118 ErrorOr<bool> llvm::needzOSConversion(const char *FileName, const int FD) { 119 ErrorOr<__ccsid_t> Ccsid = getzOSFileTag(FileName, FD); 120 if (std::error_code EC = Ccsid.getError()) 121 return EC; 122 // We don't need conversion for UTF-8 tagged files or binary files. 123 // TODO: Remove the assumption of ISO8859-1 = UTF-8 here when we fully resolve 124 // problems related to UTF-8 tagged source files. 125 switch (*Ccsid) { 126 case CCSID_UTF_8: 127 case CCSID_ISO8859_1: 128 case FT_BINARY: 129 return false; 130 default: 131 return true; 132 } 133 } 134 135 #endif //__MVS__ 136