1 /* 2 * Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved. 3 * 4 * Licensed under the Apache License 2.0 (the "License"). You may not use 5 * this file except in compliance with the License. You can obtain a copy 6 * in the file LICENSE in the source distribution or at 7 * https://www.openssl.org/source/license.html 8 */ 9 10 #include <windows.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <malloc.h> 14 15 #if defined(CP_UTF8) 16 17 static UINT saved_cp; 18 static int newargc; 19 static char **newargv; 20 21 static void cleanup(void) 22 { 23 int i; 24 25 SetConsoleOutputCP(saved_cp); 26 27 for (i = 0; i < newargc; i++) 28 free(newargv[i]); 29 30 free(newargv); 31 } 32 33 /* 34 * Incrementally [re]allocate newargv and keep it NULL-terminated. 35 */ 36 static int validate_argv(int argc) 37 { 38 static int size = 0; 39 40 if (argc >= size) { 41 char **ptr; 42 43 while (argc >= size) 44 size += 64; 45 46 ptr = realloc(newargv, size * sizeof(newargv[0])); 47 if (ptr == NULL) 48 return 0; 49 50 (newargv = ptr)[argc] = NULL; 51 } else { 52 newargv[argc] = NULL; 53 } 54 55 return 1; 56 } 57 58 static int process_glob(WCHAR *wstr, int wlen) 59 { 60 int i, slash, udlen; 61 WCHAR saved_char; 62 WIN32_FIND_DATAW data; 63 HANDLE h; 64 65 /* 66 * Note that we support wildcard characters only in filename part 67 * of the path, and not in directories. Windows users are used to 68 * this, that's why recursive glob processing is not implemented. 69 */ 70 /* 71 * Start by looking for last slash or backslash, ... 72 */ 73 for (slash = 0, i = 0; i < wlen; i++) 74 if (wstr[i] == L'/' || wstr[i] == L'\\') 75 slash = i + 1; 76 /* 77 * ... then look for asterisk or question mark in the file name. 78 */ 79 for (i = slash; i < wlen; i++) 80 if (wstr[i] == L'*' || wstr[i] == L'?') 81 break; 82 83 if (i == wlen) 84 return 0; /* definitely not a glob */ 85 86 saved_char = wstr[wlen]; 87 wstr[wlen] = L'\0'; 88 h = FindFirstFileW(wstr, &data); 89 wstr[wlen] = saved_char; 90 if (h == INVALID_HANDLE_VALUE) 91 return 0; /* not a valid glob, just pass... */ 92 93 if (slash) 94 udlen = WideCharToMultiByte(CP_UTF8, 0, wstr, slash, 95 NULL, 0, NULL, NULL); 96 else 97 udlen = 0; 98 99 do { 100 int uflen; 101 char *arg; 102 103 /* 104 * skip over . and .. 105 */ 106 if (data.cFileName[0] == L'.') { 107 if ((data.cFileName[1] == L'\0') || 108 (data.cFileName[1] == L'.' && data.cFileName[2] == L'\0')) 109 continue; 110 } 111 112 if (!validate_argv(newargc + 1)) 113 break; 114 115 /* 116 * -1 below means "scan for trailing '\0' *and* count it", 117 * so that |uflen| covers even trailing '\0'. 118 */ 119 uflen = WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1, 120 NULL, 0, NULL, NULL); 121 122 arg = malloc(udlen + uflen); 123 if (arg == NULL) 124 break; 125 126 if (udlen) 127 WideCharToMultiByte(CP_UTF8, 0, wstr, slash, 128 arg, udlen, NULL, NULL); 129 130 WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1, 131 arg + udlen, uflen, NULL, NULL); 132 133 newargv[newargc++] = arg; 134 } while (FindNextFileW(h, &data)); 135 136 CloseHandle(h); 137 138 return 1; 139 } 140 141 void win32_utf8argv(int *argc, char **argv[]) 142 { 143 const WCHAR *wcmdline; 144 WCHAR *warg, *wend, *p; 145 int wlen, ulen, valid = 1; 146 char *arg; 147 148 if (GetEnvironmentVariableW(L"OPENSSL_WIN32_UTF8", NULL, 0) == 0) 149 return; 150 151 newargc = 0; 152 newargv = NULL; 153 if (!validate_argv(newargc)) 154 return; 155 156 wcmdline = GetCommandLineW(); 157 if (wcmdline == NULL) return; 158 159 /* 160 * make a copy of the command line, since we might have to modify it... 161 */ 162 wlen = wcslen(wcmdline); 163 p = _alloca((wlen + 1) * sizeof(WCHAR)); 164 wcscpy(p, wcmdline); 165 166 while (*p != L'\0') { 167 int in_quote = 0; 168 169 if (*p == L' ' || *p == L'\t') { 170 p++; /* skip over whitespace */ 171 continue; 172 } 173 174 /* 175 * Note: because we may need to fiddle with the number of backslashes, 176 * the argument string is copied into itself. This is safe because 177 * the number of characters will never expand. 178 */ 179 warg = wend = p; 180 while (*p != L'\0' 181 && (in_quote || (*p != L' ' && *p != L'\t'))) { 182 switch (*p) { 183 case L'\\': 184 /* 185 * Microsoft documentation on how backslashes are treated 186 * is: 187 * 188 * + Backslashes are interpreted literally, unless they 189 * immediately precede a double quotation mark. 190 * + If an even number of backslashes is followed by a double 191 * quotation mark, one backslash is placed in the argv array 192 * for every pair of backslashes, and the double quotation 193 * mark is interpreted as a string delimiter. 194 * + If an odd number of backslashes is followed by a double 195 * quotation mark, one backslash is placed in the argv array 196 * for every pair of backslashes, and the double quotation 197 * mark is "escaped" by the remaining backslash, causing a 198 * literal double quotation mark (") to be placed in argv. 199 * 200 * Ref: https://msdn.microsoft.com/en-us/library/17w5ykft.aspx 201 * 202 * Though referred page doesn't mention it, multiple qouble 203 * quotes are also special. Pair of double quotes in quoted 204 * string is counted as single double quote. 205 */ 206 { 207 const WCHAR *q = p; 208 int i; 209 210 while (*p == L'\\') 211 p++; 212 213 if (*p == L'"') { 214 int i; 215 216 for (i = (p - q) / 2; i > 0; i--) 217 *wend++ = L'\\'; 218 219 /* 220 * if odd amount of backslashes before the quote, 221 * said quote is part of the argument, not a delimiter 222 */ 223 if ((p - q) % 2 == 1) 224 *wend++ = *p++; 225 } else { 226 for (i = p - q; i > 0; i--) 227 *wend++ = L'\\'; 228 } 229 } 230 break; 231 case L'"': 232 /* 233 * Without the preceding backslash (or when preceded with an 234 * even number of backslashes), the double quote is a simple 235 * string delimiter and just slightly change the parsing state 236 */ 237 if (in_quote && p[1] == L'"') 238 *wend++ = *p++; 239 else 240 in_quote = !in_quote; 241 p++; 242 break; 243 default: 244 /* 245 * Any other non-delimiter character is just taken verbatim 246 */ 247 *wend++ = *p++; 248 } 249 } 250 251 wlen = wend - warg; 252 253 if (wlen == 0 || !process_glob(warg, wlen)) { 254 if (!validate_argv(newargc + 1)) { 255 valid = 0; 256 break; 257 } 258 259 ulen = 0; 260 if (wlen > 0) { 261 ulen = WideCharToMultiByte(CP_UTF8, 0, warg, wlen, 262 NULL, 0, NULL, NULL); 263 if (ulen <= 0) 264 continue; 265 } 266 267 arg = malloc(ulen + 1); 268 if (arg == NULL) { 269 valid = 0; 270 break; 271 } 272 273 if (wlen > 0) 274 WideCharToMultiByte(CP_UTF8, 0, warg, wlen, 275 arg, ulen, NULL, NULL); 276 arg[ulen] = '\0'; 277 278 newargv[newargc++] = arg; 279 } 280 } 281 282 if (valid) { 283 saved_cp = GetConsoleOutputCP(); 284 SetConsoleOutputCP(CP_UTF8); 285 286 *argc = newargc; 287 *argv = newargv; 288 289 atexit(cleanup); 290 } else if (newargv != NULL) { 291 int i; 292 293 for (i = 0; i < newargc; i++) 294 free(newargv[i]); 295 296 free(newargv); 297 298 newargc = 0; 299 newargv = NULL; 300 } 301 302 return; 303 } 304 #else 305 void win32_utf8argv(int *argc, char **argv[]) 306 { return; } 307 #endif 308