Home | History | Annotate | Line # | Download | only in apps
      1 /*
      2  * Copyright 2016 The OpenSSL Project Authors. All Rights Reserved.
      3  *
      4  * Licensed under the OpenSSL license (the "License").  You may not use
      5  * this file except in compliance with the License.  You can obtain a copy
      6  * in the file LICENSE in the source distribution or at
      7  * https://www.openssl.org/source/license.html
      8  */
      9 
     10 #include <windows.h>
     11 #include <stdlib.h>
     12 #include <string.h>
     13 #include <malloc.h>
     14 
     15 #if defined(CP_UTF8)
     16 
     17 static UINT saved_cp;
     18 static int newargc;
     19 static char **newargv;
     20 
     21 static void cleanup(void)
     22 {
     23     int i;
     24 
     25     SetConsoleOutputCP(saved_cp);
     26 
     27     for (i = 0; i < newargc; i++)
     28         free(newargv[i]);
     29 
     30     free(newargv);
     31 }
     32 
     33 /*
     34  * Incrementally [re]allocate newargv and keep it NULL-terminated.
     35  */
     36 static int validate_argv(int argc)
     37 {
     38     static int size = 0;
     39 
     40     if (argc >= size) {
     41         char **ptr;
     42 
     43         while (argc >= size)
     44             size += 64;
     45 
     46         ptr = realloc(newargv, size * sizeof(newargv[0]));
     47         if (ptr == NULL)
     48             return 0;
     49 
     50         (newargv = ptr)[argc] = NULL;
     51     } else {
     52         newargv[argc] = NULL;
     53     }
     54 
     55     return 1;
     56 }
     57 
     58 static int process_glob(WCHAR *wstr, int wlen)
     59 {
     60     int i, slash, udlen;
     61     WCHAR saved_char;
     62     WIN32_FIND_DATAW data;
     63     HANDLE h;
     64 
     65     /*
     66      * Note that we support wildcard characters only in filename part
     67      * of the path, and not in directories. Windows users are used to
     68      * this, that's why recursive glob processing is not implemented.
     69      */
     70     /*
     71      * Start by looking for last slash or backslash, ...
     72      */
     73     for (slash = 0, i = 0; i < wlen; i++)
     74         if (wstr[i] == L'/' || wstr[i] == L'\\')
     75             slash = i + 1;
     76     /*
     77      * ... then look for asterisk or question mark in the file name.
     78      */
     79     for (i = slash; i < wlen; i++)
     80         if (wstr[i] == L'*' || wstr[i] == L'?')
     81             break;
     82 
     83     if (i == wlen)
     84         return 0;   /* definitely not a glob */
     85 
     86     saved_char = wstr[wlen];
     87     wstr[wlen] = L'\0';
     88     h = FindFirstFileW(wstr, &data);
     89     wstr[wlen] = saved_char;
     90     if (h == INVALID_HANDLE_VALUE)
     91         return 0;   /* not a valid glob, just pass... */
     92 
     93     if (slash)
     94         udlen = WideCharToMultiByte(CP_UTF8, 0, wstr, slash,
     95                                     NULL, 0, NULL, NULL);
     96     else
     97         udlen = 0;
     98 
     99     do {
    100         int uflen;
    101         char *arg;
    102 
    103         /*
    104          * skip over . and ..
    105          */
    106         if (data.cFileName[0] == L'.') {
    107             if ((data.cFileName[1] == L'\0') ||
    108                 (data.cFileName[1] == L'.' && data.cFileName[2] == L'\0'))
    109                 continue;
    110         }
    111 
    112         if (!validate_argv(newargc + 1))
    113             break;
    114 
    115         /*
    116          * -1 below means "scan for trailing '\0' *and* count it",
    117          * so that |uflen| covers even trailing '\0'.
    118          */
    119         uflen = WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1,
    120                                     NULL, 0, NULL, NULL);
    121 
    122         arg = malloc(udlen + uflen);
    123         if (arg == NULL)
    124             break;
    125 
    126         if (udlen)
    127             WideCharToMultiByte(CP_UTF8, 0, wstr, slash,
    128                                 arg, udlen, NULL, NULL);
    129 
    130         WideCharToMultiByte(CP_UTF8, 0, data.cFileName, -1,
    131                             arg + udlen, uflen, NULL, NULL);
    132 
    133         newargv[newargc++] = arg;
    134     } while (FindNextFileW(h, &data));
    135 
    136     CloseHandle(h);
    137 
    138     return 1;
    139 }
    140 
    141 void win32_utf8argv(int *argc, char **argv[])
    142 {
    143     const WCHAR *wcmdline;
    144     WCHAR *warg, *wend, *p;
    145     int wlen, ulen, valid = 1;
    146     char *arg;
    147 
    148     if (GetEnvironmentVariableW(L"OPENSSL_WIN32_UTF8", NULL, 0) == 0)
    149         return;
    150 
    151     newargc = 0;
    152     newargv = NULL;
    153     if (!validate_argv(newargc))
    154         return;
    155 
    156     wcmdline = GetCommandLineW();
    157     if (wcmdline == NULL) return;
    158 
    159     /*
    160      * make a copy of the command line, since we might have to modify it...
    161      */
    162     wlen = wcslen(wcmdline);
    163     p = _alloca((wlen + 1) * sizeof(WCHAR));
    164     wcscpy(p, wcmdline);
    165 
    166     while (*p != L'\0') {
    167         int in_quote = 0;
    168 
    169         if (*p == L' ' || *p == L'\t') {
    170             p++; /* skip over white spaces */
    171             continue;
    172         }
    173 
    174         /*
    175          * Note: because we may need to fiddle with the number of backslashes,
    176          * the argument string is copied into itself.  This is safe because
    177          * the number of characters will never expand.
    178          */
    179         warg = wend = p;
    180         while (*p != L'\0'
    181                && (in_quote || (*p != L' ' && *p != L'\t'))) {
    182             switch (*p) {
    183             case L'\\':
    184                 /*
    185                  * Microsoft documentation on how backslashes are treated
    186                  * is:
    187                  *
    188                  * + Backslashes are interpreted literally, unless they
    189                  *   immediately precede a double quotation mark.
    190                  * + If an even number of backslashes is followed by a double
    191                  *   quotation mark, one backslash is placed in the argv array
    192                  *   for every pair of backslashes, and the double quotation
    193                  *   mark is interpreted as a string delimiter.
    194                  * + If an odd number of backslashes is followed by a double
    195                  *   quotation mark, one backslash is placed in the argv array
    196                  *   for every pair of backslashes, and the double quotation
    197                  *   mark is "escaped" by the remaining backslash, causing a
    198                  *   literal double quotation mark (") to be placed in argv.
    199                  *
    200                  * Ref: https://msdn.microsoft.com/en-us/library/17w5ykft.aspx
    201                  *
    202                  * Though referred page doesn't mention it, multiple qouble
    203                  * quotes are also special. Pair of double quotes in quoted
    204                  * string is counted as single double quote.
    205                  */
    206                 {
    207                     const WCHAR *q = p;
    208                     int i;
    209 
    210                     while (*p == L'\\')
    211                         p++;
    212 
    213                     if (*p == L'"') {
    214                         int i;
    215 
    216                         for (i = (p - q) / 2; i > 0; i--)
    217                             *wend++ = L'\\';
    218 
    219                         /*
    220                          * if odd amount of backslashes before the quote,
    221                          * said quote is part of the argument, not a delimiter
    222                          */
    223                         if ((p - q) % 2 == 1)
    224                             *wend++ = *p++;
    225                     } else {
    226                         for (i = p - q; i > 0; i--)
    227                             *wend++ = L'\\';
    228                     }
    229                 }
    230                 break;
    231             case L'"':
    232                 /*
    233                  * Without the preceding backslash (or when preceded with an
    234                  * even number of backslashes), the double quote is a simple
    235                  * string delimiter and just slightly change the parsing state
    236                  */
    237                 if (in_quote && p[1] == L'"')
    238                     *wend++ = *p++;
    239                 else
    240                     in_quote = !in_quote;
    241                 p++;
    242                 break;
    243             default:
    244                 /*
    245                  * Any other non-delimiter character is just taken verbatim
    246                  */
    247                 *wend++ = *p++;
    248             }
    249         }
    250 
    251         wlen = wend - warg;
    252 
    253         if (wlen == 0 || !process_glob(warg, wlen)) {
    254             if (!validate_argv(newargc + 1)) {
    255                 valid = 0;
    256                 break;
    257             }
    258 
    259             ulen = 0;
    260             if (wlen > 0) {
    261                 ulen = WideCharToMultiByte(CP_UTF8, 0, warg, wlen,
    262                                            NULL, 0, NULL, NULL);
    263                 if (ulen <= 0)
    264                     continue;
    265             }
    266 
    267             arg = malloc(ulen + 1);
    268             if (arg == NULL) {
    269                 valid = 0;
    270                 break;
    271             }
    272 
    273             if (wlen > 0)
    274                 WideCharToMultiByte(CP_UTF8, 0, warg, wlen,
    275                                     arg, ulen, NULL, NULL);
    276             arg[ulen] = '\0';
    277 
    278             newargv[newargc++] = arg;
    279         }
    280     }
    281 
    282     if (valid) {
    283         saved_cp = GetConsoleOutputCP();
    284         SetConsoleOutputCP(CP_UTF8);
    285 
    286         *argc = newargc;
    287         *argv = newargv;
    288 
    289         atexit(cleanup);
    290     } else if (newargv != NULL) {
    291         int i;
    292 
    293         for (i = 0; i < newargc; i++)
    294             free(newargv[i]);
    295 
    296         free(newargv);
    297 
    298         newargc = 0;
    299         newargv = NULL;
    300     }
    301 
    302     return;
    303 }
    304 #else
    305 void win32_utf8argv(int *argc, char **argv[])
    306 {   return;   }
    307 #endif
    308