Home | History | Annotate | Line # | Download | only in dmd
      1  1.1  mrg /**
      2  1.1  mrg  * Implements the lexical analyzer, which converts source code into lexical tokens.
      3  1.1  mrg  *
      4  1.1  mrg  * Specification: $(LINK2 https://dlang.org/spec/lex.html, Lexical)
      5  1.1  mrg  *
      6  1.1  mrg  * Copyright:   Copyright (C) 1999-2022 by The D Language Foundation, All Rights Reserved
      7  1.1  mrg  * Authors:     $(LINK2 https://www.digitalmars.com, Walter Bright)
      8  1.1  mrg  * License:     $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0)
      9  1.1  mrg  * Source:      $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/lexer.d, _lexer.d)
     10  1.1  mrg  * Documentation:  https://dlang.org/phobos/dmd_lexer.html
     11  1.1  mrg  * Coverage:    https://codecov.io/gh/dlang/dmd/src/master/src/dmd/lexer.d
     12  1.1  mrg  */
     13  1.1  mrg 
     14  1.1  mrg module dmd.lexer;
     15  1.1  mrg 
     16  1.1  mrg import core.stdc.ctype;
     17  1.1  mrg import core.stdc.errno;
     18  1.1  mrg import core.stdc.stdarg;
     19  1.1  mrg import core.stdc.stdio;
     20  1.1  mrg import core.stdc.stdlib : getenv;
     21  1.1  mrg import core.stdc.string;
     22  1.1  mrg import core.stdc.time;
     23  1.1  mrg 
     24  1.1  mrg import dmd.entity;
     25  1.1  mrg import dmd.errors;
     26  1.1  mrg import dmd.globals;
     27  1.1  mrg import dmd.id;
     28  1.1  mrg import dmd.identifier;
     29  1.1  mrg import dmd.root.array;
     30  1.1  mrg import dmd.root.ctfloat;
     31  1.1  mrg import dmd.common.outbuffer;
     32  1.1  mrg import dmd.root.port;
     33  1.1  mrg import dmd.root.rmem;
     34  1.1  mrg import dmd.root.string;
     35  1.1  mrg import dmd.root.utf;
     36  1.1  mrg import dmd.tokens;
     37  1.1  mrg import dmd.utils;
     38  1.1  mrg 
     39  1.1  mrg nothrow:
     40  1.1  mrg 
     41  1.1  mrg version (DMDLIB)
     42  1.1  mrg {
     43  1.1  mrg     version = LocOffset;
     44  1.1  mrg }
     45  1.1  mrg 
     46  1.1  mrg /***********************************************************
     47  1.1  mrg  */
     48  1.1  mrg class Lexer
     49  1.1  mrg {
     50  1.1  mrg     private __gshared OutBuffer stringbuffer;
     51  1.1  mrg 
     52  1.1  mrg     Loc scanloc;            // for error messages
     53  1.1  mrg     Loc prevloc;            // location of token before current
     54  1.1  mrg 
     55  1.1  mrg     const(char)* p;         // current character
     56  1.1  mrg 
     57  1.1  mrg     Token token;
     58  1.1  mrg 
     59  1.1  mrg     // For ImportC
     60  1.1  mrg     bool Ccompile;              /// true if compiling ImportC
     61  1.1  mrg 
     62  1.1  mrg     // The following are valid only if (Ccompile == true)
     63  1.1  mrg     ubyte boolsize;             /// size of a C _Bool, default 1
     64  1.1  mrg     ubyte shortsize;            /// size of a C short, default 2
     65  1.1  mrg     ubyte intsize;              /// size of a C int, default 4
     66  1.1  mrg     ubyte longsize;             /// size of C long, 4 or 8
     67  1.1  mrg     ubyte long_longsize;        /// size of a C long long, default 8
     68  1.1  mrg     ubyte long_doublesize;      /// size of C long double, 8 or D real.sizeof
     69  1.1  mrg     ubyte wchar_tsize;          /// size of C wchar_t, 2 or 4
     70  1.1  mrg 
     71  1.1  mrg     private
     72  1.1  mrg     {
     73  1.1  mrg         const(char)* base;      // pointer to start of buffer
     74  1.1  mrg         const(char)* end;       // pointer to last element of buffer
     75  1.1  mrg         const(char)* line;      // start of current line
     76  1.1  mrg 
     77  1.1  mrg         bool doDocComment;      // collect doc comment information
     78  1.1  mrg         bool anyToken;          // seen at least one token
     79  1.1  mrg         bool commentToken;      // comments are TOK.comment's
     80  1.1  mrg         bool tokenizeNewlines;  // newlines are turned into TOK.endOfLine's
     81  1.1  mrg 
     82  1.1  mrg         version (DMDLIB)
     83  1.1  mrg         {
     84  1.1  mrg             bool whitespaceToken;   // tokenize whitespaces
     85  1.1  mrg         }
     86  1.1  mrg 
     87  1.1  mrg         int inTokenStringConstant; // can be larger than 1 when in nested q{} strings
     88  1.1  mrg         int lastDocLine;        // last line of previous doc comment
     89  1.1  mrg 
     90  1.1  mrg         Token* tokenFreelist;
     91  1.1  mrg     }
     92  1.1  mrg 
     93  1.1  mrg   nothrow:
     94  1.1  mrg 
     95  1.1  mrg     /*********************
     96  1.1  mrg      * Creates a Lexer for the source code base[begoffset..endoffset+1].
     97  1.1  mrg      * The last character, base[endoffset], must be null (0) or EOF (0x1A).
     98  1.1  mrg      *
     99  1.1  mrg      * Params:
    100  1.1  mrg      *  filename = used for error messages
    101  1.1  mrg      *  base = source code, must be terminated by a null (0) or EOF (0x1A) character
    102  1.1  mrg      *  begoffset = starting offset into base[]
    103  1.1  mrg      *  endoffset = the last offset to read into base[]
    104  1.1  mrg      *  doDocComment = handle documentation comments
    105  1.1  mrg      *  commentToken = comments become TOK.comment's
    106  1.1  mrg      */
    107  1.1  mrg     this(const(char)* filename, const(char)* base, size_t begoffset,
    108  1.1  mrg         size_t endoffset, bool doDocComment, bool commentToken) pure
    109  1.1  mrg     {
    110  1.1  mrg         scanloc = Loc(filename, 1, 1);
    111  1.1  mrg         // debug printf("Lexer::Lexer(%p)\n", base);
    112  1.1  mrg         // debug printf("lexer.filename = %s\n", filename);
    113  1.1  mrg         token = Token.init;
    114  1.1  mrg         this.base = base;
    115  1.1  mrg         this.end = base + endoffset;
    116  1.1  mrg         p = base + begoffset;
    117  1.1  mrg         line = p;
    118  1.1  mrg         this.doDocComment = doDocComment;
    119  1.1  mrg         this.commentToken = commentToken;
    120  1.1  mrg         this.tokenizeNewlines = false;
    121  1.1  mrg         this.inTokenStringConstant = 0;
    122  1.1  mrg         this.lastDocLine = 0;
    123  1.1  mrg         //initKeywords();
    124  1.1  mrg         /* If first line starts with '#!', ignore the line
    125  1.1  mrg          */
    126  1.1  mrg         if (p && p[0] == '#' && p[1] == '!')
    127  1.1  mrg         {
    128  1.1  mrg             p += 2;
    129  1.1  mrg             while (1)
    130  1.1  mrg             {
    131  1.1  mrg                 char c = *p++;
    132  1.1  mrg                 switch (c)
    133  1.1  mrg                 {
    134  1.1  mrg                 case 0:
    135  1.1  mrg                 case 0x1A:
    136  1.1  mrg                     p--;
    137  1.1  mrg                     goto case;
    138  1.1  mrg                 case '\n':
    139  1.1  mrg                     break;
    140  1.1  mrg                 default:
    141  1.1  mrg                     continue;
    142  1.1  mrg                 }
    143  1.1  mrg                 break;
    144  1.1  mrg             }
    145  1.1  mrg             endOfLine();
    146  1.1  mrg         }
    147  1.1  mrg     }
    148  1.1  mrg 
    149  1.1  mrg     version (DMDLIB)
    150  1.1  mrg     {
    151  1.1  mrg         this(const(char)* filename, const(char)* base, size_t begoffset, size_t endoffset,
    152  1.1  mrg             bool doDocComment, bool commentToken, bool whitespaceToken)
    153  1.1  mrg         {
    154  1.1  mrg             this(filename, base, begoffset, endoffset, doDocComment, commentToken);
    155  1.1  mrg             this.whitespaceToken = whitespaceToken;
    156  1.1  mrg         }
    157  1.1  mrg 
    158  1.1  mrg         bool empty() const pure @property @nogc @safe
    159  1.1  mrg         {
    160  1.1  mrg             return front() == TOK.endOfFile;
    161  1.1  mrg         }
    162  1.1  mrg 
    163  1.1  mrg         TOK front() const pure @property @nogc @safe
    164  1.1  mrg         {
    165  1.1  mrg             return token.value;
    166  1.1  mrg         }
    167  1.1  mrg 
    168  1.1  mrg         void popFront()
    169  1.1  mrg         {
    170  1.1  mrg             nextToken();
    171  1.1  mrg         }
    172  1.1  mrg     }
    173  1.1  mrg 
    174  1.1  mrg     /// Returns: a newly allocated `Token`.
    175  1.1  mrg     Token* allocateToken() pure nothrow @safe
    176  1.1  mrg     {
    177  1.1  mrg         if (tokenFreelist)
    178  1.1  mrg         {
    179  1.1  mrg             Token* t = tokenFreelist;
    180  1.1  mrg             tokenFreelist = t.next;
    181  1.1  mrg             t.next = null;
    182  1.1  mrg             return t;
    183  1.1  mrg         }
    184  1.1  mrg         return new Token();
    185  1.1  mrg     }
    186  1.1  mrg 
    187  1.1  mrg     /// Frees the given token by returning it to the freelist.
    188  1.1  mrg     private void releaseToken(Token* token) pure nothrow @nogc @safe
    189  1.1  mrg     {
    190  1.1  mrg         if (mem.isGCEnabled)
    191  1.1  mrg             *token = Token.init;
    192  1.1  mrg         token.next = tokenFreelist;
    193  1.1  mrg         tokenFreelist = token;
    194  1.1  mrg     }
    195  1.1  mrg 
    196  1.1  mrg     final TOK nextToken()
    197  1.1  mrg     {
    198  1.1  mrg         prevloc = token.loc;
    199  1.1  mrg         if (token.next)
    200  1.1  mrg         {
    201  1.1  mrg             Token* t = token.next;
    202  1.1  mrg             memcpy(&token, t, Token.sizeof);
    203  1.1  mrg             releaseToken(t);
    204  1.1  mrg         }
    205  1.1  mrg         else
    206  1.1  mrg         {
    207  1.1  mrg             scan(&token);
    208  1.1  mrg         }
    209  1.1  mrg         //printf(token.toChars());
    210  1.1  mrg         return token.value;
    211  1.1  mrg     }
    212  1.1  mrg 
    213  1.1  mrg     /***********************
    214  1.1  mrg      * Look ahead at next token's value.
    215  1.1  mrg      */
    216  1.1  mrg     final TOK peekNext()
    217  1.1  mrg     {
    218  1.1  mrg         return peek(&token).value;
    219  1.1  mrg     }
    220  1.1  mrg 
    221  1.1  mrg     /***********************
    222  1.1  mrg      * Look 2 tokens ahead at value.
    223  1.1  mrg      */
    224  1.1  mrg     final TOK peekNext2()
    225  1.1  mrg     {
    226  1.1  mrg         Token* t = peek(&token);
    227  1.1  mrg         return peek(t).value;
    228  1.1  mrg     }
    229  1.1  mrg 
    230  1.1  mrg     /****************************
    231  1.1  mrg      * Turn next token in buffer into a token.
    232  1.1  mrg      * Params:
    233  1.1  mrg      *  t = the token to set the resulting Token to
    234  1.1  mrg      */
    235  1.1  mrg     final void scan(Token* t)
    236  1.1  mrg     {
    237  1.1  mrg         const lastLine = scanloc.linnum;
    238  1.1  mrg         Loc startLoc;
    239  1.1  mrg         t.blockComment = null;
    240  1.1  mrg         t.lineComment = null;
    241  1.1  mrg 
    242  1.1  mrg         while (1)
    243  1.1  mrg         {
    244  1.1  mrg             t.ptr = p;
    245  1.1  mrg             //printf("p = %p, *p = '%c'\n",p,*p);
    246  1.1  mrg             t.loc = loc();
    247  1.1  mrg             switch (*p)
    248  1.1  mrg             {
    249  1.1  mrg             case 0:
    250  1.1  mrg             case 0x1A:
    251  1.1  mrg                 t.value = TOK.endOfFile; // end of file
    252  1.1  mrg                 // Intentionally not advancing `p`, such that subsequent calls keep returning TOK.endOfFile.
    253  1.1  mrg                 return;
    254  1.1  mrg             case ' ':
    255  1.1  mrg                 // Skip 4 spaces at a time after aligning 'p' to a 4-byte boundary.
    256  1.1  mrg                 while ((cast(size_t)p) % uint.sizeof)
    257  1.1  mrg                 {
    258  1.1  mrg                     if (*p != ' ')
    259  1.1  mrg                         goto LendSkipFourSpaces;
    260  1.1  mrg                     p++;
    261  1.1  mrg                 }
    262  1.1  mrg                 while (*(cast(uint*)p) == 0x20202020) // ' ' == 0x20
    263  1.1  mrg                     p += 4;
    264  1.1  mrg                 // Skip over any remaining space on the line.
    265  1.1  mrg                 while (*p == ' ')
    266  1.1  mrg                     p++;
    267  1.1  mrg             LendSkipFourSpaces:
    268  1.1  mrg                 version (DMDLIB)
    269  1.1  mrg                 {
    270  1.1  mrg                     if (whitespaceToken)
    271  1.1  mrg                     {
    272  1.1  mrg                         t.value = TOK.whitespace;
    273  1.1  mrg                         return;
    274  1.1  mrg                     }
    275  1.1  mrg                 }
    276  1.1  mrg                 continue; // skip white space
    277  1.1  mrg             case '\t':
    278  1.1  mrg             case '\v':
    279  1.1  mrg             case '\f':
    280  1.1  mrg                 p++;
    281  1.1  mrg                 version (DMDLIB)
    282  1.1  mrg                 {
    283  1.1  mrg                     if (whitespaceToken)
    284  1.1  mrg                     {
    285  1.1  mrg                         t.value = TOK.whitespace;
    286  1.1  mrg                         return;
    287  1.1  mrg                     }
    288  1.1  mrg                 }
    289  1.1  mrg                 continue; // skip white space
    290  1.1  mrg             case '\r':
    291  1.1  mrg                 p++;
    292  1.1  mrg                 if (*p != '\n') // if CR stands by itself
    293  1.1  mrg                 {
    294  1.1  mrg                     endOfLine();
    295  1.1  mrg                     if (tokenizeNewlines)
    296  1.1  mrg                     {
    297  1.1  mrg                         t.value = TOK.endOfLine;
    298  1.1  mrg                         tokenizeNewlines = false;
    299  1.1  mrg                         return;
    300  1.1  mrg                     }
    301  1.1  mrg                 }
    302  1.1  mrg                 version (DMDLIB)
    303  1.1  mrg                 {
    304  1.1  mrg                     if (whitespaceToken)
    305  1.1  mrg                     {
    306  1.1  mrg                         t.value = TOK.whitespace;
    307  1.1  mrg                         return;
    308  1.1  mrg                     }
    309  1.1  mrg                 }
    310  1.1  mrg                 continue; // skip white space
    311  1.1  mrg             case '\n':
    312  1.1  mrg                 p++;
    313  1.1  mrg                 endOfLine();
    314  1.1  mrg                 if (tokenizeNewlines)
    315  1.1  mrg                 {
    316  1.1  mrg                     t.value = TOK.endOfLine;
    317  1.1  mrg                     tokenizeNewlines = false;
    318  1.1  mrg                     return;
    319  1.1  mrg                 }
    320  1.1  mrg                 version (DMDLIB)
    321  1.1  mrg                 {
    322  1.1  mrg                     if (whitespaceToken)
    323  1.1  mrg                     {
    324  1.1  mrg                         t.value = TOK.whitespace;
    325  1.1  mrg                         return;
    326  1.1  mrg                     }
    327  1.1  mrg                 }
    328  1.1  mrg                 continue; // skip white space
    329  1.1  mrg             case '0':
    330  1.1  mrg                 if (!isZeroSecond(p[1]))        // if numeric literal does not continue
    331  1.1  mrg                 {
    332  1.1  mrg                     ++p;
    333  1.1  mrg                     t.unsvalue = 0;
    334  1.1  mrg                     t.value = TOK.int32Literal;
    335  1.1  mrg                     return;
    336  1.1  mrg                 }
    337  1.1  mrg                 goto Lnumber;
    338  1.1  mrg 
    339  1.1  mrg             case '1': .. case '9':
    340  1.1  mrg                 if (!isDigitSecond(p[1]))       // if numeric literal does not continue
    341  1.1  mrg                 {
    342  1.1  mrg                     t.unsvalue = *p - '0';
    343  1.1  mrg                     ++p;
    344  1.1  mrg                     t.value = TOK.int32Literal;
    345  1.1  mrg                     return;
    346  1.1  mrg                 }
    347  1.1  mrg             Lnumber:
    348  1.1  mrg                 t.value = number(t);
    349  1.1  mrg                 return;
    350  1.1  mrg 
    351  1.1  mrg             case '\'':
    352  1.1  mrg                 if (issinglechar(p[1]) && p[2] == '\'')
    353  1.1  mrg                 {
    354  1.1  mrg                     t.unsvalue = p[1];        // simple one character literal
    355  1.1  mrg                     t.value = TOK.charLiteral;
    356  1.1  mrg                     p += 3;
    357  1.1  mrg                 }
    358  1.1  mrg                 else if (Ccompile)
    359  1.1  mrg                 {
    360  1.1  mrg                     clexerCharConstant(*t, 0);
    361  1.1  mrg                 }
    362  1.1  mrg                 else
    363  1.1  mrg                 {
    364  1.1  mrg                     t.value = charConstant(t);
    365  1.1  mrg                 }
    366  1.1  mrg                 return;
    367  1.1  mrg 
    368  1.1  mrg             case 'u':
    369  1.1  mrg             case 'U':
    370  1.1  mrg             case 'L':
    371  1.1  mrg                 if (!Ccompile)
    372  1.1  mrg                     goto case_ident;
    373  1.1  mrg                 if (p[1] == '\'')       // C wide character constant
    374  1.1  mrg                 {
    375  1.1  mrg                     char c = *p;
    376  1.1  mrg                     if (c == 'L')       // convert L to u or U
    377  1.1  mrg                         c = (wchar_tsize == 4) ? 'u' : 'U';
    378  1.1  mrg                     ++p;
    379  1.1  mrg                     clexerCharConstant(*t, c);
    380  1.1  mrg                     return;
    381  1.1  mrg                 }
    382  1.1  mrg                 else if (p[1] == '\"')  // C wide string literal
    383  1.1  mrg                 {
    384  1.1  mrg                     const c = *p;
    385  1.1  mrg                     ++p;
    386  1.1  mrg                     escapeStringConstant(t);
    387  1.1  mrg                     t.postfix = c == 'L' ? (wchar_tsize == 2 ? 'w' : 'd') :
    388  1.1  mrg                                 c == 'u' ? 'w' :
    389  1.1  mrg                                 'd';
    390  1.1  mrg                     return;
    391  1.1  mrg                 }
    392  1.1  mrg                 else if (p[1] == '8' && p[2] == '\"') // C UTF-8 string literal
    393  1.1  mrg                 {
    394  1.1  mrg                     p += 2;
    395  1.1  mrg                     escapeStringConstant(t);
    396  1.1  mrg                     return;
    397  1.1  mrg                 }
    398  1.1  mrg                 goto case_ident;
    399  1.1  mrg 
    400  1.1  mrg             case 'r':
    401  1.1  mrg                 if (Ccompile || p[1] != '"')
    402  1.1  mrg                     goto case_ident;
    403  1.1  mrg                 p++;
    404  1.1  mrg                 goto case '`';
    405  1.1  mrg             case '`':
    406  1.1  mrg                 if (Ccompile)
    407  1.1  mrg                     goto default;
    408  1.1  mrg                 wysiwygStringConstant(t);
    409  1.1  mrg                 return;
    410  1.1  mrg             case 'q':
    411  1.1  mrg                 if (Ccompile)
    412  1.1  mrg                     goto case_ident;
    413  1.1  mrg                 if (p[1] == '"')
    414  1.1  mrg                 {
    415  1.1  mrg                     p++;
    416  1.1  mrg                     delimitedStringConstant(t);
    417  1.1  mrg                     return;
    418  1.1  mrg                 }
    419  1.1  mrg                 else if (p[1] == '{')
    420  1.1  mrg                 {
    421  1.1  mrg                     p++;
    422  1.1  mrg                     tokenStringConstant(t);
    423  1.1  mrg                     return;
    424  1.1  mrg                 }
    425  1.1  mrg                 else
    426  1.1  mrg                     goto case_ident;
    427  1.1  mrg             case '"':
    428  1.1  mrg                 escapeStringConstant(t);
    429  1.1  mrg                 return;
    430  1.1  mrg             case 'a':
    431  1.1  mrg             case 'b':
    432  1.1  mrg             case 'c':
    433  1.1  mrg             case 'd':
    434  1.1  mrg             case 'e':
    435  1.1  mrg             case 'f':
    436  1.1  mrg             case 'g':
    437  1.1  mrg             case 'h':
    438  1.1  mrg             case 'i':
    439  1.1  mrg             case 'j':
    440  1.1  mrg             case 'k':
    441  1.1  mrg             case 'l':
    442  1.1  mrg             case 'm':
    443  1.1  mrg             case 'n':
    444  1.1  mrg             case 'o':
    445  1.1  mrg             case 'p':
    446  1.1  mrg                 /*case 'q': case 'r':*/
    447  1.1  mrg             case 's':
    448  1.1  mrg             case 't':
    449  1.1  mrg             //case 'u':
    450  1.1  mrg             case 'v':
    451  1.1  mrg             case 'w':
    452  1.1  mrg             case 'x':
    453  1.1  mrg             case 'y':
    454  1.1  mrg             case 'z':
    455  1.1  mrg             case 'A':
    456  1.1  mrg             case 'B':
    457  1.1  mrg             case 'C':
    458  1.1  mrg             case 'D':
    459  1.1  mrg             case 'E':
    460  1.1  mrg             case 'F':
    461  1.1  mrg             case 'G':
    462  1.1  mrg             case 'H':
    463  1.1  mrg             case 'I':
    464  1.1  mrg             case 'J':
    465  1.1  mrg             case 'K':
    466  1.1  mrg             //case 'L':
    467  1.1  mrg             case 'M':
    468  1.1  mrg             case 'N':
    469  1.1  mrg             case 'O':
    470  1.1  mrg             case 'P':
    471  1.1  mrg             case 'Q':
    472  1.1  mrg             case 'R':
    473  1.1  mrg             case 'S':
    474  1.1  mrg             case 'T':
    475  1.1  mrg             //case 'U':
    476  1.1  mrg             case 'V':
    477  1.1  mrg             case 'W':
    478  1.1  mrg             case 'X':
    479  1.1  mrg             case 'Y':
    480  1.1  mrg             case 'Z':
    481  1.1  mrg             case '_':
    482  1.1  mrg             case_ident:
    483  1.1  mrg                 {
    484  1.1  mrg                     while (1)
    485  1.1  mrg                     {
    486  1.1  mrg                         const c = *++p;
    487  1.1  mrg                         if (isidchar(c))
    488  1.1  mrg                             continue;
    489  1.1  mrg                         else if (c & 0x80)
    490  1.1  mrg                         {
    491  1.1  mrg                             const s = p;
    492  1.1  mrg                             const u = decodeUTF();
    493  1.1  mrg                             if (isUniAlpha(u))
    494  1.1  mrg                                 continue;
    495  1.1  mrg                             error("char 0x%04x not allowed in identifier", u);
    496  1.1  mrg                             p = s;
    497  1.1  mrg                         }
    498  1.1  mrg                         break;
    499  1.1  mrg                     }
    500  1.1  mrg                     Identifier id = Identifier.idPool(cast(char*)t.ptr, cast(uint)(p - t.ptr));
    501  1.1  mrg                     t.ident = id;
    502  1.1  mrg                     t.value = cast(TOK)id.getValue();
    503  1.1  mrg 
    504  1.1  mrg                     anyToken = 1;
    505  1.1  mrg 
    506  1.1  mrg                     /* Different keywords for C and D
    507  1.1  mrg                      */
    508  1.1  mrg                     if (Ccompile)
    509  1.1  mrg                     {
    510  1.1  mrg                         if (t.value != TOK.identifier)
    511  1.1  mrg                         {
    512  1.1  mrg                             t.value = Ckeywords[t.value];  // filter out D keywords
    513  1.1  mrg                         }
    514  1.1  mrg                     }
    515  1.1  mrg                     else if (t.value >= FirstCKeyword)
    516  1.1  mrg                         t.value = TOK.identifier;       // filter out C keywords
    517  1.1  mrg 
    518  1.1  mrg                     else if (*t.ptr == '_') // if special identifier token
    519  1.1  mrg                     {
    520  1.1  mrg                         // Lazy initialization
    521  1.1  mrg                         TimeStampInfo.initialize(t.loc);
    522  1.1  mrg 
    523  1.1  mrg                         if (id == Id.DATE)
    524  1.1  mrg                         {
    525  1.1  mrg                             t.ustring = TimeStampInfo.date.ptr;
    526  1.1  mrg                             goto Lstr;
    527  1.1  mrg                         }
    528  1.1  mrg                         else if (id == Id.TIME)
    529  1.1  mrg                         {
    530  1.1  mrg                             t.ustring = TimeStampInfo.time.ptr;
    531  1.1  mrg                             goto Lstr;
    532  1.1  mrg                         }
    533  1.1  mrg                         else if (id == Id.VENDOR)
    534  1.1  mrg                         {
    535  1.1  mrg                             t.ustring = global.vendor.xarraydup.ptr;
    536  1.1  mrg                             goto Lstr;
    537  1.1  mrg                         }
    538  1.1  mrg                         else if (id == Id.TIMESTAMP)
    539  1.1  mrg                         {
    540  1.1  mrg                             t.ustring = TimeStampInfo.timestamp.ptr;
    541  1.1  mrg                         Lstr:
    542  1.1  mrg                             t.value = TOK.string_;
    543  1.1  mrg                             t.postfix = 0;
    544  1.1  mrg                             t.len = cast(uint)strlen(t.ustring);
    545  1.1  mrg                         }
    546  1.1  mrg                         else if (id == Id.VERSIONX)
    547  1.1  mrg                         {
    548  1.1  mrg                             t.value = TOK.int64Literal;
    549  1.1  mrg                             t.unsvalue = global.versionNumber();
    550  1.1  mrg                         }
    551  1.1  mrg                         else if (id == Id.EOFX)
    552  1.1  mrg                         {
    553  1.1  mrg                             t.value = TOK.endOfFile;
    554  1.1  mrg                             // Advance scanner to end of file
    555  1.1  mrg                             while (!(*p == 0 || *p == 0x1A))
    556  1.1  mrg                                 p++;
    557  1.1  mrg                         }
    558  1.1  mrg                     }
    559  1.1  mrg                     //printf("t.value = %d\n",t.value);
    560  1.1  mrg                     return;
    561  1.1  mrg                 }
    562  1.1  mrg             case '/':
    563  1.1  mrg                 p++;
    564  1.1  mrg                 switch (*p)
    565  1.1  mrg                 {
    566  1.1  mrg                 case '=':
    567  1.1  mrg                     p++;
    568  1.1  mrg                     t.value = TOK.divAssign;
    569  1.1  mrg                     return;
    570  1.1  mrg                 case '*':
    571  1.1  mrg                     p++;
    572  1.1  mrg                     startLoc = loc();
    573  1.1  mrg                     while (1)
    574  1.1  mrg                     {
    575  1.1  mrg                         while (1)
    576  1.1  mrg                         {
    577  1.1  mrg                             const c = *p;
    578  1.1  mrg                             switch (c)
    579  1.1  mrg                             {
    580  1.1  mrg                             case '/':
    581  1.1  mrg                                 break;
    582  1.1  mrg                             case '\n':
    583  1.1  mrg                                 endOfLine();
    584  1.1  mrg                                 p++;
    585  1.1  mrg                                 continue;
    586  1.1  mrg                             case '\r':
    587  1.1  mrg                                 p++;
    588  1.1  mrg                                 if (*p != '\n')
    589  1.1  mrg                                     endOfLine();
    590  1.1  mrg                                 continue;
    591  1.1  mrg                             case 0:
    592  1.1  mrg                             case 0x1A:
    593  1.1  mrg                                 error("unterminated /* */ comment");
    594  1.1  mrg                                 p = end;
    595  1.1  mrg                                 t.loc = loc();
    596  1.1  mrg                                 t.value = TOK.endOfFile;
    597  1.1  mrg                                 return;
    598  1.1  mrg                             default:
    599  1.1  mrg                                 if (c & 0x80)
    600  1.1  mrg                                 {
    601  1.1  mrg                                     const u = decodeUTF();
    602  1.1  mrg                                     if (u == PS || u == LS)
    603  1.1  mrg                                         endOfLine();
    604  1.1  mrg                                 }
    605  1.1  mrg                                 p++;
    606  1.1  mrg                                 continue;
    607  1.1  mrg                             }
    608  1.1  mrg                             break;
    609  1.1  mrg                         }
    610  1.1  mrg                         p++;
    611  1.1  mrg                         if (p[-2] == '*' && p - 3 != t.ptr)
    612  1.1  mrg                             break;
    613  1.1  mrg                     }
    614  1.1  mrg                     if (commentToken)
    615  1.1  mrg                     {
    616  1.1  mrg                         t.loc = startLoc;
    617  1.1  mrg                         t.value = TOK.comment;
    618  1.1  mrg                         return;
    619  1.1  mrg                     }
    620  1.1  mrg                     else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr)
    621  1.1  mrg                     {
    622  1.1  mrg                         // if /** but not /**/
    623  1.1  mrg                         getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
    624  1.1  mrg                         lastDocLine = scanloc.linnum;
    625  1.1  mrg                     }
    626  1.1  mrg                     continue;
    627  1.1  mrg                 case '/': // do // style comments
    628  1.1  mrg                     startLoc = loc();
    629  1.1  mrg                     while (1)
    630  1.1  mrg                     {
    631  1.1  mrg                         const c = *++p;
    632  1.1  mrg                         switch (c)
    633  1.1  mrg                         {
    634  1.1  mrg                         case '\n':
    635  1.1  mrg                             break;
    636  1.1  mrg                         case '\r':
    637  1.1  mrg                             if (p[1] == '\n')
    638  1.1  mrg                                 p++;
    639  1.1  mrg                             break;
    640  1.1  mrg                         case 0:
    641  1.1  mrg                         case 0x1A:
    642  1.1  mrg                             if (commentToken)
    643  1.1  mrg                             {
    644  1.1  mrg                                 p = end;
    645  1.1  mrg                                 t.loc = startLoc;
    646  1.1  mrg                                 t.value = TOK.comment;
    647  1.1  mrg                                 return;
    648  1.1  mrg                             }
    649  1.1  mrg                             if (doDocComment && t.ptr[2] == '/')
    650  1.1  mrg                             {
    651  1.1  mrg                                 getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
    652  1.1  mrg                                 lastDocLine = scanloc.linnum;
    653  1.1  mrg                             }
    654  1.1  mrg                             p = end;
    655  1.1  mrg                             t.loc = loc();
    656  1.1  mrg                             t.value = TOK.endOfFile;
    657  1.1  mrg                             return;
    658  1.1  mrg                         default:
    659  1.1  mrg                             if (c & 0x80)
    660  1.1  mrg                             {
    661  1.1  mrg                                 const u = decodeUTF();
    662  1.1  mrg                                 if (u == PS || u == LS)
    663  1.1  mrg                                     break;
    664  1.1  mrg                             }
    665  1.1  mrg                             continue;
    666  1.1  mrg                         }
    667  1.1  mrg                         break;
    668  1.1  mrg                     }
    669  1.1  mrg                     if (commentToken)
    670  1.1  mrg                     {
    671  1.1  mrg                         version (DMDLIB) {}
    672  1.1  mrg                         else
    673  1.1  mrg                         {
    674  1.1  mrg                             p++;
    675  1.1  mrg                             endOfLine();
    676  1.1  mrg                         }
    677  1.1  mrg                         t.loc = startLoc;
    678  1.1  mrg                         t.value = TOK.comment;
    679  1.1  mrg                         return;
    680  1.1  mrg                     }
    681  1.1  mrg                     if (doDocComment && t.ptr[2] == '/')
    682  1.1  mrg                     {
    683  1.1  mrg                         getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
    684  1.1  mrg                         lastDocLine = scanloc.linnum;
    685  1.1  mrg                     }
    686  1.1  mrg                     p++;
    687  1.1  mrg                     endOfLine();
    688  1.1  mrg                     continue;
    689  1.1  mrg                 case '+':
    690  1.1  mrg                     if (!Ccompile)
    691  1.1  mrg                     {
    692  1.1  mrg                         int nest;
    693  1.1  mrg                         startLoc = loc();
    694  1.1  mrg                         p++;
    695  1.1  mrg                         nest = 1;
    696  1.1  mrg                         while (1)
    697  1.1  mrg                         {
    698  1.1  mrg                             char c = *p;
    699  1.1  mrg                             switch (c)
    700  1.1  mrg                             {
    701  1.1  mrg                             case '/':
    702  1.1  mrg                                 p++;
    703  1.1  mrg                                 if (*p == '+')
    704  1.1  mrg                                 {
    705  1.1  mrg                                     p++;
    706  1.1  mrg                                     nest++;
    707  1.1  mrg                                 }
    708  1.1  mrg                                 continue;
    709  1.1  mrg                             case '+':
    710  1.1  mrg                                 p++;
    711  1.1  mrg                                 if (*p == '/')
    712  1.1  mrg                                 {
    713  1.1  mrg                                     p++;
    714  1.1  mrg                                     if (--nest == 0)
    715  1.1  mrg                                         break;
    716  1.1  mrg                                 }
    717  1.1  mrg                                 continue;
    718  1.1  mrg                             case '\r':
    719  1.1  mrg                                 p++;
    720  1.1  mrg                                 if (*p != '\n')
    721  1.1  mrg                                     endOfLine();
    722  1.1  mrg                                 continue;
    723  1.1  mrg                             case '\n':
    724  1.1  mrg                                 endOfLine();
    725  1.1  mrg                                 p++;
    726  1.1  mrg                                 continue;
    727  1.1  mrg                             case 0:
    728  1.1  mrg                             case 0x1A:
    729  1.1  mrg                                 error("unterminated /+ +/ comment");
    730  1.1  mrg                                 p = end;
    731  1.1  mrg                                 t.loc = loc();
    732  1.1  mrg                                 t.value = TOK.endOfFile;
    733  1.1  mrg                                 return;
    734  1.1  mrg                             default:
    735  1.1  mrg                                 if (c & 0x80)
    736  1.1  mrg                                 {
    737  1.1  mrg                                     uint u = decodeUTF();
    738  1.1  mrg                                     if (u == PS || u == LS)
    739  1.1  mrg                                         endOfLine();
    740  1.1  mrg                                 }
    741  1.1  mrg                                 p++;
    742  1.1  mrg                                 continue;
    743  1.1  mrg                             }
    744  1.1  mrg                             break;
    745  1.1  mrg                         }
    746  1.1  mrg                         if (commentToken)
    747  1.1  mrg                         {
    748  1.1  mrg                             t.loc = startLoc;
    749  1.1  mrg                             t.value = TOK.comment;
    750  1.1  mrg                             return;
    751  1.1  mrg                         }
    752  1.1  mrg                         if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr)
    753  1.1  mrg                         {
    754  1.1  mrg                             // if /++ but not /++/
    755  1.1  mrg                             getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1);
    756  1.1  mrg                             lastDocLine = scanloc.linnum;
    757  1.1  mrg                         }
    758  1.1  mrg                         continue;
    759  1.1  mrg                     }
    760  1.1  mrg                     break;
    761  1.1  mrg                 default:
    762  1.1  mrg                     break;
    763  1.1  mrg                 }
    764  1.1  mrg                 t.value = TOK.div;
    765  1.1  mrg                 return;
    766  1.1  mrg             case '.':
    767  1.1  mrg                 p++;
    768  1.1  mrg                 if (isdigit(*p))
    769  1.1  mrg                 {
    770  1.1  mrg                     /* Note that we don't allow ._1 and ._ as being
    771  1.1  mrg                      * valid floating point numbers.
    772  1.1  mrg                      */
    773  1.1  mrg                     p--;
    774  1.1  mrg                     t.value = inreal(t);
    775  1.1  mrg                 }
    776  1.1  mrg                 else if (p[0] == '.')
    777  1.1  mrg                 {
    778  1.1  mrg                     if (p[1] == '.')
    779  1.1  mrg                     {
    780  1.1  mrg                         p += 2;
    781  1.1  mrg                         t.value = TOK.dotDotDot;
    782  1.1  mrg                     }
    783  1.1  mrg                     else
    784  1.1  mrg                     {
    785  1.1  mrg                         p++;
    786  1.1  mrg                         t.value = TOK.slice;
    787  1.1  mrg                     }
    788  1.1  mrg                 }
    789  1.1  mrg                 else
    790  1.1  mrg                     t.value = TOK.dot;
    791  1.1  mrg                 return;
    792  1.1  mrg             case '&':
    793  1.1  mrg                 p++;
    794  1.1  mrg                 if (*p == '=')
    795  1.1  mrg                 {
    796  1.1  mrg                     p++;
    797  1.1  mrg                     t.value = TOK.andAssign;
    798  1.1  mrg                 }
    799  1.1  mrg                 else if (*p == '&')
    800  1.1  mrg                 {
    801  1.1  mrg                     p++;
    802  1.1  mrg                     t.value = TOK.andAnd;
    803  1.1  mrg                 }
    804  1.1  mrg                 else
    805  1.1  mrg                     t.value = TOK.and;
    806  1.1  mrg                 return;
    807  1.1  mrg             case '|':
    808  1.1  mrg                 p++;
    809  1.1  mrg                 if (*p == '=')
    810  1.1  mrg                 {
    811  1.1  mrg                     p++;
    812  1.1  mrg                     t.value = TOK.orAssign;
    813  1.1  mrg                 }
    814  1.1  mrg                 else if (*p == '|')
    815  1.1  mrg                 {
    816  1.1  mrg                     p++;
    817  1.1  mrg                     t.value = TOK.orOr;
    818  1.1  mrg                 }
    819  1.1  mrg                 else
    820  1.1  mrg                     t.value = TOK.or;
    821  1.1  mrg                 return;
    822  1.1  mrg             case '-':
    823  1.1  mrg                 p++;
    824  1.1  mrg                 if (*p == '=')
    825  1.1  mrg                 {
    826  1.1  mrg                     p++;
    827  1.1  mrg                     t.value = TOK.minAssign;
    828  1.1  mrg                 }
    829  1.1  mrg                 else if (*p == '-')
    830  1.1  mrg                 {
    831  1.1  mrg                     p++;
    832  1.1  mrg                     t.value = TOK.minusMinus;
    833  1.1  mrg                 }
    834  1.1  mrg                 else if (*p == '>')
    835  1.1  mrg                 {
    836  1.1  mrg                     ++p;
    837  1.1  mrg                     t.value = TOK.arrow;
    838  1.1  mrg                 }
    839  1.1  mrg                 else
    840  1.1  mrg                     t.value = TOK.min;
    841  1.1  mrg                 return;
    842  1.1  mrg             case '+':
    843  1.1  mrg                 p++;
    844  1.1  mrg                 if (*p == '=')
    845  1.1  mrg                 {
    846  1.1  mrg                     p++;
    847  1.1  mrg                     t.value = TOK.addAssign;
    848  1.1  mrg                 }
    849  1.1  mrg                 else if (*p == '+')
    850  1.1  mrg                 {
    851  1.1  mrg                     p++;
    852  1.1  mrg                     t.value = TOK.plusPlus;
    853  1.1  mrg                 }
    854  1.1  mrg                 else
    855  1.1  mrg                     t.value = TOK.add;
    856  1.1  mrg                 return;
    857  1.1  mrg             case '<':
    858  1.1  mrg                 p++;
    859  1.1  mrg                 if (*p == '=')
    860  1.1  mrg                 {
    861  1.1  mrg                     p++;
    862  1.1  mrg                     t.value = TOK.lessOrEqual; // <=
    863  1.1  mrg                 }
    864  1.1  mrg                 else if (*p == '<')
    865  1.1  mrg                 {
    866  1.1  mrg                     p++;
    867  1.1  mrg                     if (*p == '=')
    868  1.1  mrg                     {
    869  1.1  mrg                         p++;
    870  1.1  mrg                         t.value = TOK.leftShiftAssign; // <<=
    871  1.1  mrg                     }
    872  1.1  mrg                     else
    873  1.1  mrg                         t.value = TOK.leftShift; // <<
    874  1.1  mrg                 }
    875  1.1  mrg                 else if (*p == ':' && Ccompile)
    876  1.1  mrg                 {
    877  1.1  mrg                     ++p;
    878  1.1  mrg                     t.value = TOK.leftBracket;  // <:
    879  1.1  mrg                 }
    880  1.1  mrg                 else if (*p == '%' && Ccompile)
    881  1.1  mrg                 {
    882  1.1  mrg                     ++p;
    883  1.1  mrg                     t.value = TOK.leftCurly;    // <%
    884  1.1  mrg                 }
    885  1.1  mrg                 else
    886  1.1  mrg                     t.value = TOK.lessThan; // <
    887  1.1  mrg                 return;
    888  1.1  mrg             case '>':
    889  1.1  mrg                 p++;
    890  1.1  mrg                 if (*p == '=')
    891  1.1  mrg                 {
    892  1.1  mrg                     p++;
    893  1.1  mrg                     t.value = TOK.greaterOrEqual; // >=
    894  1.1  mrg                 }
    895  1.1  mrg                 else if (*p == '>')
    896  1.1  mrg                 {
    897  1.1  mrg                     p++;
    898  1.1  mrg                     if (*p == '=')
    899  1.1  mrg                     {
    900  1.1  mrg                         p++;
    901  1.1  mrg                         t.value = TOK.rightShiftAssign; // >>=
    902  1.1  mrg                     }
    903  1.1  mrg                     else if (*p == '>')
    904  1.1  mrg                     {
    905  1.1  mrg                         p++;
    906  1.1  mrg                         if (*p == '=')
    907  1.1  mrg                         {
    908  1.1  mrg                             p++;
    909  1.1  mrg                             t.value = TOK.unsignedRightShiftAssign; // >>>=
    910  1.1  mrg                         }
    911  1.1  mrg                         else
    912  1.1  mrg                             t.value = TOK.unsignedRightShift; // >>>
    913  1.1  mrg                     }
    914  1.1  mrg                     else
    915  1.1  mrg                         t.value = TOK.rightShift; // >>
    916  1.1  mrg                 }
    917  1.1  mrg                 else
    918  1.1  mrg                     t.value = TOK.greaterThan; // >
    919  1.1  mrg                 return;
    920  1.1  mrg             case '!':
    921  1.1  mrg                 p++;
    922  1.1  mrg                 if (*p == '=')
    923  1.1  mrg                 {
    924  1.1  mrg                     p++;
    925  1.1  mrg                     t.value = TOK.notEqual; // !=
    926  1.1  mrg                 }
    927  1.1  mrg                 else
    928  1.1  mrg                     t.value = TOK.not; // !
    929  1.1  mrg                 return;
    930  1.1  mrg             case '=':
    931  1.1  mrg                 p++;
    932  1.1  mrg                 if (*p == '=')
    933  1.1  mrg                 {
    934  1.1  mrg                     p++;
    935  1.1  mrg                     t.value = TOK.equal; // ==
    936  1.1  mrg                 }
    937  1.1  mrg                 else if (*p == '>')
    938  1.1  mrg                 {
    939  1.1  mrg                     p++;
    940  1.1  mrg                     t.value = TOK.goesTo; // =>
    941  1.1  mrg                 }
    942  1.1  mrg                 else
    943  1.1  mrg                     t.value = TOK.assign; // =
    944  1.1  mrg                 return;
    945  1.1  mrg             case '~':
    946  1.1  mrg                 p++;
    947  1.1  mrg                 if (*p == '=')
    948  1.1  mrg                 {
    949  1.1  mrg                     p++;
    950  1.1  mrg                     t.value = TOK.concatenateAssign; // ~=
    951  1.1  mrg                 }
    952  1.1  mrg                 else
    953  1.1  mrg                     t.value = TOK.tilde; // ~
    954  1.1  mrg                 return;
    955  1.1  mrg             case '^':
    956  1.1  mrg                 p++;
    957  1.1  mrg                 if (*p == '^')
    958  1.1  mrg                 {
    959  1.1  mrg                     p++;
    960  1.1  mrg                     if (*p == '=')
    961  1.1  mrg                     {
    962  1.1  mrg                         p++;
    963  1.1  mrg                         t.value = TOK.powAssign; // ^^=
    964  1.1  mrg                     }
    965  1.1  mrg                     else
    966  1.1  mrg                         t.value = TOK.pow; // ^^
    967  1.1  mrg                 }
    968  1.1  mrg                 else if (*p == '=')
    969  1.1  mrg                 {
    970  1.1  mrg                     p++;
    971  1.1  mrg                     t.value = TOK.xorAssign; // ^=
    972  1.1  mrg                 }
    973  1.1  mrg                 else
    974  1.1  mrg                     t.value = TOK.xor; // ^
    975  1.1  mrg                 return;
    976  1.1  mrg             case '(':
    977  1.1  mrg                 p++;
    978  1.1  mrg                 t.value = TOK.leftParenthesis;
    979  1.1  mrg                 return;
    980  1.1  mrg             case ')':
    981  1.1  mrg                 p++;
    982  1.1  mrg                 t.value = TOK.rightParenthesis;
    983  1.1  mrg                 return;
    984  1.1  mrg             case '[':
    985  1.1  mrg                 p++;
    986  1.1  mrg                 t.value = TOK.leftBracket;
    987  1.1  mrg                 return;
    988  1.1  mrg             case ']':
    989  1.1  mrg                 p++;
    990  1.1  mrg                 t.value = TOK.rightBracket;
    991  1.1  mrg                 return;
    992  1.1  mrg             case '{':
    993  1.1  mrg                 p++;
    994  1.1  mrg                 t.value = TOK.leftCurly;
    995  1.1  mrg                 return;
    996  1.1  mrg             case '}':
    997  1.1  mrg                 p++;
    998  1.1  mrg                 t.value = TOK.rightCurly;
    999  1.1  mrg                 return;
   1000  1.1  mrg             case '?':
   1001  1.1  mrg                 p++;
   1002  1.1  mrg                 t.value = TOK.question;
   1003  1.1  mrg                 return;
   1004  1.1  mrg             case ',':
   1005  1.1  mrg                 p++;
   1006  1.1  mrg                 t.value = TOK.comma;
   1007  1.1  mrg                 return;
   1008  1.1  mrg             case ';':
   1009  1.1  mrg                 p++;
   1010  1.1  mrg                 t.value = TOK.semicolon;
   1011  1.1  mrg                 return;
   1012  1.1  mrg             case ':':
   1013  1.1  mrg                 p++;
   1014  1.1  mrg                 if (*p == ':')
   1015  1.1  mrg                 {
   1016  1.1  mrg                     ++p;
   1017  1.1  mrg                     t.value = TOK.colonColon;
   1018  1.1  mrg                 }
   1019  1.1  mrg                 else if (*p == '>' && Ccompile)
   1020  1.1  mrg                 {
   1021  1.1  mrg                     ++p;
   1022  1.1  mrg                     t.value = TOK.rightBracket;
   1023  1.1  mrg                 }
   1024  1.1  mrg                 else
   1025  1.1  mrg                     t.value = TOK.colon;
   1026  1.1  mrg                 return;
   1027  1.1  mrg             case '$':
   1028  1.1  mrg                 p++;
   1029  1.1  mrg                 t.value = TOK.dollar;
   1030  1.1  mrg                 return;
   1031  1.1  mrg             case '@':
   1032  1.1  mrg                 p++;
   1033  1.1  mrg                 t.value = TOK.at;
   1034  1.1  mrg                 return;
   1035  1.1  mrg             case '*':
   1036  1.1  mrg                 p++;
   1037  1.1  mrg                 if (*p == '=')
   1038  1.1  mrg                 {
   1039  1.1  mrg                     p++;
   1040  1.1  mrg                     t.value = TOK.mulAssign;
   1041  1.1  mrg                 }
   1042  1.1  mrg                 else
   1043  1.1  mrg                     t.value = TOK.mul;
   1044  1.1  mrg                 return;
   1045  1.1  mrg             case '%':
   1046  1.1  mrg                 p++;
   1047  1.1  mrg                 if (*p == '=')
   1048  1.1  mrg                 {
   1049  1.1  mrg                     p++;
   1050  1.1  mrg                     t.value = TOK.modAssign;
   1051  1.1  mrg                 }
   1052  1.1  mrg                 else if (*p == '>' && Ccompile)
   1053  1.1  mrg                 {
   1054  1.1  mrg                     ++p;
   1055  1.1  mrg                     t.value = TOK.rightCurly;
   1056  1.1  mrg                 }
   1057  1.1  mrg                 else if (*p == ':' && Ccompile)
   1058  1.1  mrg                 {
   1059  1.1  mrg                     goto case '#';      // %: means #
   1060  1.1  mrg                 }
   1061  1.1  mrg                 else
   1062  1.1  mrg                     t.value = TOK.mod;
   1063  1.1  mrg                 return;
   1064  1.1  mrg             case '#':
   1065  1.1  mrg                 {
   1066  1.1  mrg                     // https://issues.dlang.org/show_bug.cgi?id=22825
   1067  1.1  mrg                     // Special token sequences are terminated by newlines,
   1068  1.1  mrg                     // and should not be skipped over.
   1069  1.1  mrg                     this.tokenizeNewlines = true;
   1070  1.1  mrg                     p++;
   1071  1.1  mrg                     if (parseSpecialTokenSequence())
   1072  1.1  mrg                         continue;
   1073  1.1  mrg                     t.value = TOK.pound;
   1074  1.1  mrg                     return;
   1075  1.1  mrg                 }
   1076  1.1  mrg             default:
   1077  1.1  mrg                 {
   1078  1.1  mrg                     dchar c = *p;
   1079  1.1  mrg                     if (c & 0x80)
   1080  1.1  mrg                     {
   1081  1.1  mrg                         c = decodeUTF();
   1082  1.1  mrg                         // Check for start of unicode identifier
   1083  1.1  mrg                         if (isUniAlpha(c))
   1084  1.1  mrg                             goto case_ident;
   1085  1.1  mrg                         if (c == PS || c == LS)
   1086  1.1  mrg                         {
   1087  1.1  mrg                             endOfLine();
   1088  1.1  mrg                             p++;
   1089  1.1  mrg                             if (tokenizeNewlines)
   1090  1.1  mrg                             {
   1091  1.1  mrg                                 t.value = TOK.endOfLine;
   1092  1.1  mrg                                 tokenizeNewlines = false;
   1093  1.1  mrg                                 return;
   1094  1.1  mrg                             }
   1095  1.1  mrg                             continue;
   1096  1.1  mrg                         }
   1097  1.1  mrg                     }
   1098  1.1  mrg                     if (c < 0x80 && isprint(c))
   1099  1.1  mrg                         error("character '%c' is not a valid token", c);
   1100  1.1  mrg                     else
   1101  1.1  mrg                         error("character 0x%02x is not a valid token", c);
   1102  1.1  mrg                     p++;
   1103  1.1  mrg                     continue;
   1104  1.1  mrg                 }
   1105  1.1  mrg             }
   1106  1.1  mrg         }
   1107  1.1  mrg     }
   1108  1.1  mrg 
   1109  1.1  mrg     final Token* peek(Token* ct)
   1110  1.1  mrg     {
   1111  1.1  mrg         Token* t;
   1112  1.1  mrg         if (ct.next)
   1113  1.1  mrg             t = ct.next;
   1114  1.1  mrg         else
   1115  1.1  mrg         {
   1116  1.1  mrg             t = allocateToken();
   1117  1.1  mrg             scan(t);
   1118  1.1  mrg             ct.next = t;
   1119  1.1  mrg         }
   1120  1.1  mrg         return t;
   1121  1.1  mrg     }
   1122  1.1  mrg 
   1123  1.1  mrg     /*********************************
   1124  1.1  mrg      * tk is on the opening (.
   1125  1.1  mrg      * Look ahead and return token that is past the closing ).
   1126  1.1  mrg      */
   1127  1.1  mrg     final Token* peekPastParen(Token* tk)
   1128  1.1  mrg     {
   1129  1.1  mrg         //printf("peekPastParen()\n");
   1130  1.1  mrg         int parens = 1;
   1131  1.1  mrg         int curlynest = 0;
   1132  1.1  mrg         while (1)
   1133  1.1  mrg         {
   1134  1.1  mrg             tk = peek(tk);
   1135  1.1  mrg             //tk.print();
   1136  1.1  mrg             switch (tk.value)
   1137  1.1  mrg             {
   1138  1.1  mrg             case TOK.leftParenthesis:
   1139  1.1  mrg                 parens++;
   1140  1.1  mrg                 continue;
   1141  1.1  mrg             case TOK.rightParenthesis:
   1142  1.1  mrg                 --parens;
   1143  1.1  mrg                 if (parens)
   1144  1.1  mrg                     continue;
   1145  1.1  mrg                 tk = peek(tk);
   1146  1.1  mrg                 break;
   1147  1.1  mrg             case TOK.leftCurly:
   1148  1.1  mrg                 curlynest++;
   1149  1.1  mrg                 continue;
   1150  1.1  mrg             case TOK.rightCurly:
   1151  1.1  mrg                 if (--curlynest >= 0)
   1152  1.1  mrg                     continue;
   1153  1.1  mrg                 break;
   1154  1.1  mrg             case TOK.semicolon:
   1155  1.1  mrg                 if (curlynest)
   1156  1.1  mrg                     continue;
   1157  1.1  mrg                 break;
   1158  1.1  mrg             case TOK.endOfFile:
   1159  1.1  mrg                 break;
   1160  1.1  mrg             default:
   1161  1.1  mrg                 continue;
   1162  1.1  mrg             }
   1163  1.1  mrg             return tk;
   1164  1.1  mrg         }
   1165  1.1  mrg     }
   1166  1.1  mrg 
   1167  1.1  mrg     /*******************************************
   1168  1.1  mrg      * Parse escape sequence.
   1169  1.1  mrg      */
   1170  1.1  mrg     private uint escapeSequence()
   1171  1.1  mrg     {
   1172  1.1  mrg         return Lexer.escapeSequence(token.loc, p, Ccompile);
   1173  1.1  mrg     }
   1174  1.1  mrg 
   1175  1.1  mrg     /********
   1176  1.1  mrg      * Parse the given string literal escape sequence into a single character.
   1177  1.1  mrg      * D https://dlang.org/spec/lex.html#escape_sequences
   1178  1.1  mrg      * C11 6.4.4.4
   1179  1.1  mrg      * Params:
   1180  1.1  mrg      *  loc = location to use for error messages
   1181  1.1  mrg      *  sequence = pointer to string with escape sequence to parse. Updated to
   1182  1.1  mrg      *             point past the end of the escape sequence
   1183  1.1  mrg      *  Ccompile = true for compile C11 escape sequences
   1184  1.1  mrg      * Returns:
   1185  1.1  mrg      *  the escape sequence as a single character
   1186  1.1  mrg      */
   1187  1.1  mrg     private static dchar escapeSequence(const ref Loc loc, ref const(char)* sequence, bool Ccompile)
   1188  1.1  mrg     {
   1189  1.1  mrg         const(char)* p = sequence; // cache sequence reference on stack
   1190  1.1  mrg         scope(exit) sequence = p;
   1191  1.1  mrg 
   1192  1.1  mrg         uint c = *p;
   1193  1.1  mrg         int ndigits;
   1194  1.1  mrg         switch (c)
   1195  1.1  mrg         {
   1196  1.1  mrg         case '\'':
   1197  1.1  mrg         case '"':
   1198  1.1  mrg         case '?':
   1199  1.1  mrg         case '\\':
   1200  1.1  mrg         Lconsume:
   1201  1.1  mrg             p++;
   1202  1.1  mrg             break;
   1203  1.1  mrg         case 'a':
   1204  1.1  mrg             c = 7;
   1205  1.1  mrg             goto Lconsume;
   1206  1.1  mrg         case 'b':
   1207  1.1  mrg             c = 8;
   1208  1.1  mrg             goto Lconsume;
   1209  1.1  mrg         case 'f':
   1210  1.1  mrg             c = 12;
   1211  1.1  mrg             goto Lconsume;
   1212  1.1  mrg         case 'n':
   1213  1.1  mrg             c = 10;
   1214  1.1  mrg             goto Lconsume;
   1215  1.1  mrg         case 'r':
   1216  1.1  mrg             c = 13;
   1217  1.1  mrg             goto Lconsume;
   1218  1.1  mrg         case 't':
   1219  1.1  mrg             c = 9;
   1220  1.1  mrg             goto Lconsume;
   1221  1.1  mrg         case 'v':
   1222  1.1  mrg             c = 11;
   1223  1.1  mrg             goto Lconsume;
   1224  1.1  mrg         case 'u':
   1225  1.1  mrg             ndigits = 4;
   1226  1.1  mrg             goto Lhex;
   1227  1.1  mrg         case 'U':
   1228  1.1  mrg             ndigits = 8;
   1229  1.1  mrg             goto Lhex;
   1230  1.1  mrg         case 'x':
   1231  1.1  mrg             ndigits = 2;
   1232  1.1  mrg         Lhex:
   1233  1.1  mrg             p++;
   1234  1.1  mrg             c = *p;
   1235  1.1  mrg             if (ishex(cast(char)c))
   1236  1.1  mrg             {
   1237  1.1  mrg                 uint v = 0;
   1238  1.1  mrg                 int n = 0;
   1239  1.1  mrg                 if (Ccompile && ndigits == 2)
   1240  1.1  mrg                 {
   1241  1.1  mrg                     /* C11 6.4.4.4-7 one to infinity hex digits
   1242  1.1  mrg                      */
   1243  1.1  mrg                     do
   1244  1.1  mrg                     {
   1245  1.1  mrg                         if (isdigit(cast(char)c))
   1246  1.1  mrg                             c -= '0';
   1247  1.1  mrg                         else if (islower(c))
   1248  1.1  mrg                             c -= 'a' - 10;
   1249  1.1  mrg                         else
   1250  1.1  mrg                             c -= 'A' - 10;
   1251  1.1  mrg                         v = v * 16 + c;
   1252  1.1  mrg                         c = *++p;
   1253  1.1  mrg                     } while (ishex(cast(char)c));
   1254  1.1  mrg                 }
   1255  1.1  mrg                 else
   1256  1.1  mrg                 {
   1257  1.1  mrg                     while (1)
   1258  1.1  mrg                     {
   1259  1.1  mrg                         if (isdigit(cast(char)c))
   1260  1.1  mrg                             c -= '0';
   1261  1.1  mrg                         else if (islower(c))
   1262  1.1  mrg                             c -= 'a' - 10;
   1263  1.1  mrg                         else
   1264  1.1  mrg                             c -= 'A' - 10;
   1265  1.1  mrg                         v = v * 16 + c;
   1266  1.1  mrg                         c = *++p;
   1267  1.1  mrg                         if (++n == ndigits)
   1268  1.1  mrg                             break;
   1269  1.1  mrg                         if (!ishex(cast(char)c))
   1270  1.1  mrg                         {
   1271  1.1  mrg                             .error(loc, "escape hex sequence has %d hex digits instead of %d", n, ndigits);
   1272  1.1  mrg                             break;
   1273  1.1  mrg                         }
   1274  1.1  mrg                     }
   1275  1.1  mrg                     if (ndigits != 2 && !utf_isValidDchar(v))
   1276  1.1  mrg                     {
   1277  1.1  mrg                         .error(loc, "invalid UTF character \\U%08x", v);
   1278  1.1  mrg                         v = '?'; // recover with valid UTF character
   1279  1.1  mrg                     }
   1280  1.1  mrg                 }
   1281  1.1  mrg                 c = v;
   1282  1.1  mrg             }
   1283  1.1  mrg             else
   1284  1.1  mrg             {
   1285  1.1  mrg                 .error(loc, "undefined escape hex sequence \\%c%c", sequence[0], c);
   1286  1.1  mrg                 p++;
   1287  1.1  mrg             }
   1288  1.1  mrg             break;
   1289  1.1  mrg         case '&':
   1290  1.1  mrg             if (Ccompile)
   1291  1.1  mrg                 goto default;
   1292  1.1  mrg 
   1293  1.1  mrg             // named character entity
   1294  1.1  mrg             for (const idstart = ++p; 1; p++)
   1295  1.1  mrg             {
   1296  1.1  mrg                 switch (*p)
   1297  1.1  mrg                 {
   1298  1.1  mrg                 case ';':
   1299  1.1  mrg                     c = HtmlNamedEntity(idstart, p - idstart);
   1300  1.1  mrg                     if (c == ~0)
   1301  1.1  mrg                     {
   1302  1.1  mrg                         .error(loc, "unnamed character entity &%.*s;", cast(int)(p - idstart), idstart);
   1303  1.1  mrg                         c = '?';
   1304  1.1  mrg                     }
   1305  1.1  mrg                     p++;
   1306  1.1  mrg                     break;
   1307  1.1  mrg                 default:
   1308  1.1  mrg                     if (isalpha(*p) || (p != idstart && isdigit(*p)))
   1309  1.1  mrg                         continue;
   1310  1.1  mrg                     .error(loc, "unterminated named entity &%.*s;", cast(int)(p - idstart + 1), idstart);
   1311  1.1  mrg                     c = '?';
   1312  1.1  mrg                     break;
   1313  1.1  mrg                 }
   1314  1.1  mrg                 break;
   1315  1.1  mrg             }
   1316  1.1  mrg             break;
   1317  1.1  mrg         case 0:
   1318  1.1  mrg         case 0x1A:
   1319  1.1  mrg             // end of file
   1320  1.1  mrg             c = '\\';
   1321  1.1  mrg             break;
   1322  1.1  mrg         default:
   1323  1.1  mrg             if (isoctal(cast(char)c))
   1324  1.1  mrg             {
   1325  1.1  mrg                 uint v = 0;
   1326  1.1  mrg                 int n = 0;
   1327  1.1  mrg                 do
   1328  1.1  mrg                 {
   1329  1.1  mrg                     v = v * 8 + (c - '0');
   1330  1.1  mrg                     c = *++p;
   1331  1.1  mrg                 }
   1332  1.1  mrg                 while (++n < 3 && isoctal(cast(char)c));
   1333  1.1  mrg                 c = v;
   1334  1.1  mrg                 if (c > 0xFF)
   1335  1.1  mrg                     .error(loc, "escape octal sequence \\%03o is larger than \\377", c);
   1336  1.1  mrg             }
   1337  1.1  mrg             else
   1338  1.1  mrg             {
   1339  1.1  mrg                 .error(loc, "undefined escape sequence \\%c", c);
   1340  1.1  mrg                 p++;
   1341  1.1  mrg             }
   1342  1.1  mrg             break;
   1343  1.1  mrg         }
   1344  1.1  mrg         return c;
   1345  1.1  mrg     }
   1346  1.1  mrg 
   1347  1.1  mrg     /**
   1348  1.1  mrg     Lex a wysiwyg string. `p` must be pointing to the first character before the
   1349  1.1  mrg     contents of the string literal. The character pointed to by `p` will be used as
   1350  1.1  mrg     the terminating character (i.e. backtick or double-quote).
   1351  1.1  mrg     Params:
   1352  1.1  mrg         result = pointer to the token that accepts the result
   1353  1.1  mrg     */
   1354  1.1  mrg     private void wysiwygStringConstant(Token* result)
   1355  1.1  mrg     {
   1356  1.1  mrg         result.value = TOK.string_;
   1357  1.1  mrg         Loc start = loc();
   1358  1.1  mrg         auto terminator = p[0];
   1359  1.1  mrg         p++;
   1360  1.1  mrg         stringbuffer.setsize(0);
   1361  1.1  mrg         while (1)
   1362  1.1  mrg         {
   1363  1.1  mrg             dchar c = p[0];
   1364  1.1  mrg             p++;
   1365  1.1  mrg             switch (c)
   1366  1.1  mrg             {
   1367  1.1  mrg             case '\n':
   1368  1.1  mrg                 endOfLine();
   1369  1.1  mrg                 break;
   1370  1.1  mrg             case '\r':
   1371  1.1  mrg                 if (p[0] == '\n')
   1372  1.1  mrg                     continue; // ignore
   1373  1.1  mrg                 c = '\n'; // treat EndOfLine as \n character
   1374  1.1  mrg                 endOfLine();
   1375  1.1  mrg                 break;
   1376  1.1  mrg             case 0:
   1377  1.1  mrg             case 0x1A:
   1378  1.1  mrg                 error("unterminated string constant starting at %s", start.toChars());
   1379  1.1  mrg                 result.setString();
   1380  1.1  mrg                 // rewind `p` so it points to the EOF character
   1381  1.1  mrg                 p--;
   1382  1.1  mrg                 return;
   1383  1.1  mrg             default:
   1384  1.1  mrg                 if (c == terminator)
   1385  1.1  mrg                 {
   1386  1.1  mrg                     result.setString(stringbuffer);
   1387  1.1  mrg                     stringPostfix(result);
   1388  1.1  mrg                     return;
   1389  1.1  mrg                 }
   1390  1.1  mrg                 else if (c & 0x80)
   1391  1.1  mrg                 {
   1392  1.1  mrg                     p--;
   1393  1.1  mrg                     const u = decodeUTF();
   1394  1.1  mrg                     p++;
   1395  1.1  mrg                     if (u == PS || u == LS)
   1396  1.1  mrg                         endOfLine();
   1397  1.1  mrg                     stringbuffer.writeUTF8(u);
   1398  1.1  mrg                     continue;
   1399  1.1  mrg                 }
   1400  1.1  mrg                 break;
   1401  1.1  mrg             }
   1402  1.1  mrg             stringbuffer.writeByte(c);
   1403  1.1  mrg         }
   1404  1.1  mrg     }
   1405  1.1  mrg 
   1406  1.1  mrg     /**
   1407  1.1  mrg     Lex a delimited string. Some examples of delimited strings are:
   1408  1.1  mrg     ---
   1409  1.1  mrg     q"(foo(xxx))"      // "foo(xxx)"
   1410  1.1  mrg     q"[foo$(LPAREN)]"  // "foo$(LPAREN)"
   1411  1.1  mrg     q"/foo]/"          // "foo]"
   1412  1.1  mrg     q"HERE
   1413  1.1  mrg     foo
   1414  1.1  mrg     HERE"              // "foo\n"
   1415  1.1  mrg     ---
   1416  1.1  mrg     It is assumed that `p` points to the opening double-quote '"'.
   1417  1.1  mrg     Params:
   1418  1.1  mrg         result = pointer to the token that accepts the result
   1419  1.1  mrg     */
   1420  1.1  mrg     private void delimitedStringConstant(Token* result)
   1421  1.1  mrg     {
   1422  1.1  mrg         result.value = TOK.string_;
   1423  1.1  mrg         Loc start = loc();
   1424  1.1  mrg         dchar delimleft = 0;
   1425  1.1  mrg         dchar delimright = 0;
   1426  1.1  mrg         uint nest = 1;
   1427  1.1  mrg         uint nestcount = ~0; // dead assignment, needed to suppress warning
   1428  1.1  mrg         Identifier hereid = null;
   1429  1.1  mrg         uint blankrol = 0;
   1430  1.1  mrg         uint startline = 0;
   1431  1.1  mrg         p++;
   1432  1.1  mrg         stringbuffer.setsize(0);
   1433  1.1  mrg         while (1)
   1434  1.1  mrg         {
   1435  1.1  mrg             dchar c = *p++;
   1436  1.1  mrg             //printf("c = '%c'\n", c);
   1437  1.1  mrg             switch (c)
   1438  1.1  mrg             {
   1439  1.1  mrg             case '\n':
   1440  1.1  mrg             Lnextline:
   1441  1.1  mrg                 endOfLine();
   1442  1.1  mrg                 startline = 1;
   1443  1.1  mrg                 if (blankrol)
   1444  1.1  mrg                 {
   1445  1.1  mrg                     blankrol = 0;
   1446  1.1  mrg                     continue;
   1447  1.1  mrg                 }
   1448  1.1  mrg                 if (hereid)
   1449  1.1  mrg                 {
   1450  1.1  mrg                     stringbuffer.writeUTF8(c);
   1451  1.1  mrg                     continue;
   1452  1.1  mrg                 }
   1453  1.1  mrg                 break;
   1454  1.1  mrg             case '\r':
   1455  1.1  mrg                 if (*p == '\n')
   1456  1.1  mrg                     continue; // ignore
   1457  1.1  mrg                 c = '\n'; // treat EndOfLine as \n character
   1458  1.1  mrg                 goto Lnextline;
   1459  1.1  mrg             case 0:
   1460  1.1  mrg             case 0x1A:
   1461  1.1  mrg                 error("unterminated delimited string constant starting at %s", start.toChars());
   1462  1.1  mrg                 result.setString();
   1463  1.1  mrg                 // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
   1464  1.1  mrg                 p--;
   1465  1.1  mrg                 return;
   1466  1.1  mrg             default:
   1467  1.1  mrg                 if (c & 0x80)
   1468  1.1  mrg                 {
   1469  1.1  mrg                     p--;
   1470  1.1  mrg                     c = decodeUTF();
   1471  1.1  mrg                     p++;
   1472  1.1  mrg                     if (c == PS || c == LS)
   1473  1.1  mrg                         goto Lnextline;
   1474  1.1  mrg                 }
   1475  1.1  mrg                 break;
   1476  1.1  mrg             }
   1477  1.1  mrg             if (delimleft == 0)
   1478  1.1  mrg             {
   1479  1.1  mrg                 delimleft = c;
   1480  1.1  mrg                 nest = 1;
   1481  1.1  mrg                 nestcount = 1;
   1482  1.1  mrg                 if (c == '(')
   1483  1.1  mrg                     delimright = ')';
   1484  1.1  mrg                 else if (c == '{')
   1485  1.1  mrg                     delimright = '}';
   1486  1.1  mrg                 else if (c == '[')
   1487  1.1  mrg                     delimright = ']';
   1488  1.1  mrg                 else if (c == '<')
   1489  1.1  mrg                     delimright = '>';
   1490  1.1  mrg                 else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c)))
   1491  1.1  mrg                 {
   1492  1.1  mrg                     // Start of identifier; must be a heredoc
   1493  1.1  mrg                     Token tok;
   1494  1.1  mrg                     p--;
   1495  1.1  mrg                     scan(&tok); // read in heredoc identifier
   1496  1.1  mrg                     if (tok.value != TOK.identifier)
   1497  1.1  mrg                     {
   1498  1.1  mrg                         error("identifier expected for heredoc, not %s", tok.toChars());
   1499  1.1  mrg                         delimright = c;
   1500  1.1  mrg                     }
   1501  1.1  mrg                     else
   1502  1.1  mrg                     {
   1503  1.1  mrg                         hereid = tok.ident;
   1504  1.1  mrg                         //printf("hereid = '%s'\n", hereid.toChars());
   1505  1.1  mrg                         blankrol = 1;
   1506  1.1  mrg                     }
   1507  1.1  mrg                     nest = 0;
   1508  1.1  mrg                 }
   1509  1.1  mrg                 else
   1510  1.1  mrg                 {
   1511  1.1  mrg                     delimright = c;
   1512  1.1  mrg                     nest = 0;
   1513  1.1  mrg                     if (isspace(c))
   1514  1.1  mrg                         error("delimiter cannot be whitespace");
   1515  1.1  mrg                 }
   1516  1.1  mrg             }
   1517  1.1  mrg             else
   1518  1.1  mrg             {
   1519  1.1  mrg                 if (blankrol)
   1520  1.1  mrg                 {
   1521  1.1  mrg                     error("heredoc rest of line should be blank");
   1522  1.1  mrg                     blankrol = 0;
   1523  1.1  mrg                     continue;
   1524  1.1  mrg                 }
   1525  1.1  mrg                 if (nest == 1)
   1526  1.1  mrg                 {
   1527  1.1  mrg                     if (c == delimleft)
   1528  1.1  mrg                         nestcount++;
   1529  1.1  mrg                     else if (c == delimright)
   1530  1.1  mrg                     {
   1531  1.1  mrg                         nestcount--;
   1532  1.1  mrg                         if (nestcount == 0)
   1533  1.1  mrg                             goto Ldone;
   1534  1.1  mrg                     }
   1535  1.1  mrg                 }
   1536  1.1  mrg                 else if (c == delimright)
   1537  1.1  mrg                     goto Ldone;
   1538  1.1  mrg                 if (startline && (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) && hereid)
   1539  1.1  mrg                 {
   1540  1.1  mrg                     Token tok;
   1541  1.1  mrg                     auto psave = p;
   1542  1.1  mrg                     p--;
   1543  1.1  mrg                     scan(&tok); // read in possible heredoc identifier
   1544  1.1  mrg                     //printf("endid = '%s'\n", tok.ident.toChars());
   1545  1.1  mrg                     if (tok.value == TOK.identifier && tok.ident is hereid)
   1546  1.1  mrg                     {
   1547  1.1  mrg                         /* should check that rest of line is blank
   1548  1.1  mrg                          */
   1549  1.1  mrg                         goto Ldone;
   1550  1.1  mrg                     }
   1551  1.1  mrg                     p = psave;
   1552  1.1  mrg                 }
   1553  1.1  mrg                 stringbuffer.writeUTF8(c);
   1554  1.1  mrg                 startline = 0;
   1555  1.1  mrg             }
   1556  1.1  mrg         }
   1557  1.1  mrg     Ldone:
   1558  1.1  mrg         if (*p == '"')
   1559  1.1  mrg             p++;
   1560  1.1  mrg         else if (hereid)
   1561  1.1  mrg             error("delimited string must end in `%s\"`", hereid.toChars());
   1562  1.1  mrg         else if (isspace(delimright))
   1563  1.1  mrg             error("delimited string must end in `\"`");
   1564  1.1  mrg         else
   1565  1.1  mrg             error("delimited string must end in `%c\"`", delimright);
   1566  1.1  mrg         result.setString(stringbuffer);
   1567  1.1  mrg         stringPostfix(result);
   1568  1.1  mrg     }
   1569  1.1  mrg 
   1570  1.1  mrg     /**
   1571  1.1  mrg     Lex a token string. Some examples of token strings are:
   1572  1.1  mrg     ---
   1573  1.1  mrg     q{ foo(xxx) }    // " foo(xxx) "
   1574  1.1  mrg     q{foo$(LPAREN)}  // "foo$(LPAREN)"
   1575  1.1  mrg     q{{foo}"}"}      // "{foo}"}""
   1576  1.1  mrg     ---
   1577  1.1  mrg     It is assumed that `p` points to the opening curly-brace.
   1578  1.1  mrg     Params:
   1579  1.1  mrg         result = pointer to the token that accepts the result
   1580  1.1  mrg     */
   1581  1.1  mrg     private void tokenStringConstant(Token* result)
   1582  1.1  mrg     {
   1583  1.1  mrg         result.value = TOK.string_;
   1584  1.1  mrg 
   1585  1.1  mrg         uint nest = 1;
   1586  1.1  mrg         const start = loc();
   1587  1.1  mrg         const pstart = ++p;
   1588  1.1  mrg         inTokenStringConstant++;
   1589  1.1  mrg         scope(exit) inTokenStringConstant--;
   1590  1.1  mrg         while (1)
   1591  1.1  mrg         {
   1592  1.1  mrg             Token tok;
   1593  1.1  mrg             scan(&tok);
   1594  1.1  mrg             switch (tok.value)
   1595  1.1  mrg             {
   1596  1.1  mrg             case TOK.leftCurly:
   1597  1.1  mrg                 nest++;
   1598  1.1  mrg                 continue;
   1599  1.1  mrg             case TOK.rightCurly:
   1600  1.1  mrg                 if (--nest == 0)
   1601  1.1  mrg                 {
   1602  1.1  mrg                     result.setString(pstart, p - 1 - pstart);
   1603  1.1  mrg                     stringPostfix(result);
   1604  1.1  mrg                     return;
   1605  1.1  mrg                 }
   1606  1.1  mrg                 continue;
   1607  1.1  mrg             case TOK.endOfFile:
   1608  1.1  mrg                 error("unterminated token string constant starting at %s", start.toChars());
   1609  1.1  mrg                 result.setString();
   1610  1.1  mrg                 return;
   1611  1.1  mrg             default:
   1612  1.1  mrg                 continue;
   1613  1.1  mrg             }
   1614  1.1  mrg         }
   1615  1.1  mrg     }
   1616  1.1  mrg 
   1617  1.1  mrg     /**
   1618  1.1  mrg     Scan a quoted string while building the processed string value by
   1619  1.1  mrg     handling escape sequences. The result is returned in the given `t` token.
   1620  1.1  mrg     This function assumes that `p` currently points to the opening quote
   1621  1.1  mrg     of the string.
   1622  1.1  mrg     Params:
   1623  1.1  mrg         t = the token to set the resulting string to
   1624  1.1  mrg     * References:
   1625  1.1  mrg     *   D https://dlang.org/spec/lex.html#double_quoted_strings
   1626  1.1  mrg     *   ImportC C11 6.4.5
   1627  1.1  mrg     */
   1628  1.1  mrg     private void escapeStringConstant(Token* t)
   1629  1.1  mrg     {
   1630  1.1  mrg         t.value = TOK.string_;
   1631  1.1  mrg 
   1632  1.1  mrg         const start = loc();
   1633  1.1  mrg         const tc = *p++;        // opening quote
   1634  1.1  mrg         stringbuffer.setsize(0);
   1635  1.1  mrg         while (1)
   1636  1.1  mrg         {
   1637  1.1  mrg             dchar c = *p++;
   1638  1.1  mrg             switch (c)
   1639  1.1  mrg             {
   1640  1.1  mrg             case '\\':
   1641  1.1  mrg                 switch (*p)
   1642  1.1  mrg                 {
   1643  1.1  mrg                 case '&':
   1644  1.1  mrg                     if (Ccompile)
   1645  1.1  mrg                         goto default;
   1646  1.1  mrg                     goto case;
   1647  1.1  mrg 
   1648  1.1  mrg                 case 'u':
   1649  1.1  mrg                 case 'U':
   1650  1.1  mrg                     c = escapeSequence();
   1651  1.1  mrg                     stringbuffer.writeUTF8(c);
   1652  1.1  mrg                     continue;
   1653  1.1  mrg                 default:
   1654  1.1  mrg                     c = escapeSequence();
   1655  1.1  mrg                     break;
   1656  1.1  mrg                 }
   1657  1.1  mrg                 break;
   1658  1.1  mrg             case '\n':
   1659  1.1  mrg                 endOfLine();
   1660  1.1  mrg                 if (Ccompile)
   1661  1.1  mrg                     goto Lunterminated;
   1662  1.1  mrg                 break;
   1663  1.1  mrg             case '\r':
   1664  1.1  mrg                 if (*p == '\n')
   1665  1.1  mrg                     continue; // ignore
   1666  1.1  mrg                 c = '\n'; // treat EndOfLine as \n character
   1667  1.1  mrg                 endOfLine();
   1668  1.1  mrg                 if (Ccompile)
   1669  1.1  mrg                     goto Lunterminated;
   1670  1.1  mrg                 break;
   1671  1.1  mrg             case '\'':
   1672  1.1  mrg             case '"':
   1673  1.1  mrg                 if (c != tc)
   1674  1.1  mrg                     goto default;
   1675  1.1  mrg                 t.setString(stringbuffer);
   1676  1.1  mrg                 if (!Ccompile)
   1677  1.1  mrg                     stringPostfix(t);
   1678  1.1  mrg                 return;
   1679  1.1  mrg             case 0:
   1680  1.1  mrg             case 0x1A:
   1681  1.1  mrg                 // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
   1682  1.1  mrg                 p--;
   1683  1.1  mrg             Lunterminated:
   1684  1.1  mrg                 error("unterminated string constant starting at %s", start.toChars());
   1685  1.1  mrg                 t.setString();
   1686  1.1  mrg                 return;
   1687  1.1  mrg             default:
   1688  1.1  mrg                 if (c & 0x80)
   1689  1.1  mrg                 {
   1690  1.1  mrg                     p--;
   1691  1.1  mrg                     c = decodeUTF();
   1692  1.1  mrg                     if (c == LS || c == PS)
   1693  1.1  mrg                     {
   1694  1.1  mrg                         c = '\n';
   1695  1.1  mrg                         endOfLine();
   1696  1.1  mrg                         if (Ccompile)
   1697  1.1  mrg                             goto Lunterminated;
   1698  1.1  mrg                     }
   1699  1.1  mrg                     p++;
   1700  1.1  mrg                     stringbuffer.writeUTF8(c);
   1701  1.1  mrg                     continue;
   1702  1.1  mrg                 }
   1703  1.1  mrg                 break;
   1704  1.1  mrg             }
   1705  1.1  mrg             stringbuffer.writeByte(c);
   1706  1.1  mrg         }
   1707  1.1  mrg     }
   1708  1.1  mrg 
   1709  1.1  mrg     /**************************************
   1710  1.1  mrg      * Reference:
   1711  1.1  mrg      *    https://dlang.org/spec/lex.html#characterliteral
   1712  1.1  mrg      */
   1713  1.1  mrg     private TOK charConstant(Token* t)
   1714  1.1  mrg     {
   1715  1.1  mrg         TOK tk = TOK.charLiteral;
   1716  1.1  mrg         //printf("Lexer::charConstant\n");
   1717  1.1  mrg         p++;
   1718  1.1  mrg         dchar c = *p++;
   1719  1.1  mrg         switch (c)
   1720  1.1  mrg         {
   1721  1.1  mrg         case '\\':
   1722  1.1  mrg             switch (*p)
   1723  1.1  mrg             {
   1724  1.1  mrg             case 'u':
   1725  1.1  mrg                 t.unsvalue = escapeSequence();
   1726  1.1  mrg                 tk = TOK.wcharLiteral;
   1727  1.1  mrg                 break;
   1728  1.1  mrg             case 'U':
   1729  1.1  mrg             case '&':
   1730  1.1  mrg                 t.unsvalue = escapeSequence();
   1731  1.1  mrg                 tk = TOK.dcharLiteral;
   1732  1.1  mrg                 break;
   1733  1.1  mrg             default:
   1734  1.1  mrg                 t.unsvalue = escapeSequence();
   1735  1.1  mrg                 break;
   1736  1.1  mrg             }
   1737  1.1  mrg             break;
   1738  1.1  mrg         case '\n':
   1739  1.1  mrg         L1:
   1740  1.1  mrg             endOfLine();
   1741  1.1  mrg             goto case;
   1742  1.1  mrg         case '\r':
   1743  1.1  mrg             goto case '\'';
   1744  1.1  mrg         case 0:
   1745  1.1  mrg         case 0x1A:
   1746  1.1  mrg             // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token).
   1747  1.1  mrg             p--;
   1748  1.1  mrg             goto case;
   1749  1.1  mrg         case '\'':
   1750  1.1  mrg             error("unterminated character constant");
   1751  1.1  mrg             t.unsvalue = '?';
   1752  1.1  mrg             return tk;
   1753  1.1  mrg         default:
   1754  1.1  mrg             if (c & 0x80)
   1755  1.1  mrg             {
   1756  1.1  mrg                 p--;
   1757  1.1  mrg                 c = decodeUTF();
   1758  1.1  mrg                 p++;
   1759  1.1  mrg                 if (c == LS || c == PS)
   1760  1.1  mrg                     goto L1;
   1761  1.1  mrg                 if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE))
   1762  1.1  mrg                     tk = TOK.wcharLiteral;
   1763  1.1  mrg                 else
   1764  1.1  mrg                     tk = TOK.dcharLiteral;
   1765  1.1  mrg             }
   1766  1.1  mrg             t.unsvalue = c;
   1767  1.1  mrg             break;
   1768  1.1  mrg         }
   1769  1.1  mrg         if (*p != '\'')
   1770  1.1  mrg         {
   1771  1.1  mrg             while (*p != '\'' && *p != 0x1A && *p != 0 && *p != '\n' &&
   1772  1.1  mrg                     *p != '\r' && *p != ';' && *p != ')' && *p != ']' && *p != '}')
   1773  1.1  mrg             {
   1774  1.1  mrg                 if (*p & 0x80)
   1775  1.1  mrg                 {
   1776  1.1  mrg                     const s = p;
   1777  1.1  mrg                     c = decodeUTF();
   1778  1.1  mrg                     if (c == LS || c == PS)
   1779  1.1  mrg                     {
   1780  1.1  mrg                         p = s;
   1781  1.1  mrg                         break;
   1782  1.1  mrg                     }
   1783  1.1  mrg                 }
   1784  1.1  mrg                 p++;
   1785  1.1  mrg             }
   1786  1.1  mrg 
   1787  1.1  mrg             if (*p == '\'')
   1788  1.1  mrg             {
   1789  1.1  mrg                 error("character constant has multiple characters");
   1790  1.1  mrg                 p++;
   1791  1.1  mrg             }
   1792  1.1  mrg             else
   1793  1.1  mrg                 error("unterminated character constant");
   1794  1.1  mrg             t.unsvalue = '?';
   1795  1.1  mrg             return tk;
   1796  1.1  mrg         }
   1797  1.1  mrg         p++;
   1798  1.1  mrg         return tk;
   1799  1.1  mrg     }
   1800  1.1  mrg 
   1801  1.1  mrg     /***************************************
   1802  1.1  mrg      * Lex C character constant.
   1803  1.1  mrg      * Parser is on the opening quote.
   1804  1.1  mrg      * Params:
   1805  1.1  mrg      *  t = token to fill in
   1806  1.1  mrg      *  prefix = one of `u`, `U` or 0.
   1807  1.1  mrg      * Reference:
   1808  1.1  mrg      *  C11 6.4.4.4
   1809  1.1  mrg      */
   1810  1.1  mrg     private void clexerCharConstant(ref Token t, char prefix)
   1811  1.1  mrg     {
   1812  1.1  mrg         escapeStringConstant(&t);
   1813  1.1  mrg         const(char)[] str = t.ustring[0 .. t.len];
   1814  1.1  mrg         const n = str.length;
   1815  1.1  mrg         const loc = t.loc;
   1816  1.1  mrg         if (n == 0)
   1817  1.1  mrg         {
   1818  1.1  mrg             error(loc, "empty character constant");
   1819  1.1  mrg             t.value = TOK.semicolon;
   1820  1.1  mrg             return;
   1821  1.1  mrg         }
   1822  1.1  mrg 
   1823  1.1  mrg         uint u;
   1824  1.1  mrg         switch (prefix)
   1825  1.1  mrg         {
   1826  1.1  mrg             case 0:
   1827  1.1  mrg                 if (n == 1) // fast case
   1828  1.1  mrg                 {
   1829  1.1  mrg                     u = str[0];
   1830  1.1  mrg                 }
   1831  1.1  mrg                 else if (n > 4)
   1832  1.1  mrg                     error(loc, "max number of chars in character literal is 4, had %d",
   1833  1.1  mrg                         cast(int)n);
   1834  1.1  mrg                 else
   1835  1.1  mrg                 {
   1836  1.1  mrg                     foreach (i, c; str)
   1837  1.1  mrg                         (cast(char*)&u)[n - 1 - i] = c;
   1838  1.1  mrg                 }
   1839  1.1  mrg                 break;
   1840  1.1  mrg 
   1841  1.1  mrg             case 'u':
   1842  1.1  mrg                 dchar d1;
   1843  1.1  mrg                 size_t idx;
   1844  1.1  mrg                 auto msg = utf_decodeChar(str, idx, d1);
   1845  1.1  mrg                 dchar d2 = 0;
   1846  1.1  mrg                 if (idx < n && !msg)
   1847  1.1  mrg                     msg = utf_decodeChar(str, idx, d2);
   1848  1.1  mrg                 if (msg)
   1849  1.1  mrg                     error(loc, "%s", msg);
   1850  1.1  mrg                 else if (idx < n)
   1851  1.1  mrg                     error(loc, "max number of chars in 16 bit character literal is 2, had %d",
   1852  1.1  mrg                         (n + 1) >> 1);
   1853  1.1  mrg                 else if (d1 > 0x1_0000)
   1854  1.1  mrg                     error(loc, "%d does not fit in 16 bits", d1);
   1855  1.1  mrg                 else if (d2 > 0x1_0000)
   1856  1.1  mrg                     error(loc, "%d does not fit in 16 bits", d2);
   1857  1.1  mrg                 u = d1;
   1858  1.1  mrg                 if (d2)
   1859  1.1  mrg                     u = (d1 << 16) | d2;
   1860  1.1  mrg                 break;
   1861  1.1  mrg 
   1862  1.1  mrg             case 'U':
   1863  1.1  mrg                 dchar d;
   1864  1.1  mrg                 size_t idx;
   1865  1.1  mrg                 auto msg = utf_decodeChar(str, idx, d);
   1866  1.1  mrg                 if (msg)
   1867  1.1  mrg                     error(loc, "%s", msg);
   1868  1.1  mrg                 else if (idx < n)
   1869  1.1  mrg                     error(loc, "max number of chars in 32 bit character literal is 1, had %d",
   1870  1.1  mrg                         (n + 3) >> 2);
   1871  1.1  mrg                 u = d;
   1872  1.1  mrg                 break;
   1873  1.1  mrg 
   1874  1.1  mrg             default:
   1875  1.1  mrg                 assert(0);
   1876  1.1  mrg         }
   1877  1.1  mrg         t.value = n == 1 ? TOK.charLiteral : TOK.int32Literal;
   1878  1.1  mrg         t.unsvalue = u;
   1879  1.1  mrg     }
   1880  1.1  mrg 
   1881  1.1  mrg     /***************************************
   1882  1.1  mrg      * Get postfix of string literal.
   1883  1.1  mrg      */
   1884  1.1  mrg     private void stringPostfix(Token* t) pure @nogc
   1885  1.1  mrg     {
   1886  1.1  mrg         switch (*p)
   1887  1.1  mrg         {
   1888  1.1  mrg         case 'c':
   1889  1.1  mrg         case 'w':
   1890  1.1  mrg         case 'd':
   1891  1.1  mrg             t.postfix = *p;
   1892  1.1  mrg             p++;
   1893  1.1  mrg             break;
   1894  1.1  mrg         default:
   1895  1.1  mrg             t.postfix = 0;
   1896  1.1  mrg             break;
   1897  1.1  mrg         }
   1898  1.1  mrg     }
   1899  1.1  mrg 
   1900  1.1  mrg     /**************************************
   1901  1.1  mrg      * Read in a number.
   1902  1.1  mrg      * If it's an integer, store it in tok.TKutok.Vlong.
   1903  1.1  mrg      *      integers can be decimal, octal or hex
   1904  1.1  mrg      *      Handle the suffixes U, UL, LU, L, etc.
   1905  1.1  mrg      * If it's double, store it in tok.TKutok.Vdouble.
   1906  1.1  mrg      * Returns:
   1907  1.1  mrg      *      TKnum
   1908  1.1  mrg      *      TKdouble,...
   1909  1.1  mrg      */
   1910  1.1  mrg     private TOK number(Token* t)
   1911  1.1  mrg     {
   1912  1.1  mrg         int base = 10;
   1913  1.1  mrg         const start = p;
   1914  1.1  mrg         uinteger_t n = 0; // unsigned >=64 bit integer type
   1915  1.1  mrg         int d;
   1916  1.1  mrg         bool err = false;
   1917  1.1  mrg         bool overflow = false;
   1918  1.1  mrg         bool anyBinaryDigitsNoSingleUS = false;
   1919  1.1  mrg         bool anyHexDigitsNoSingleUS = false;
   1920  1.1  mrg         char errorDigit = 0;
   1921  1.1  mrg         dchar c = *p;
   1922  1.1  mrg         if (c == '0')
   1923  1.1  mrg         {
   1924  1.1  mrg             ++p;
   1925  1.1  mrg             c = *p;
   1926  1.1  mrg             switch (c)
   1927  1.1  mrg             {
   1928  1.1  mrg             case '0':
   1929  1.1  mrg             case '1':
   1930  1.1  mrg             case '2':
   1931  1.1  mrg             case '3':
   1932  1.1  mrg             case '4':
   1933  1.1  mrg             case '5':
   1934  1.1  mrg             case '6':
   1935  1.1  mrg             case '7':
   1936  1.1  mrg                 base = 8;
   1937  1.1  mrg                 break;
   1938  1.1  mrg 
   1939  1.1  mrg             case '8':
   1940  1.1  mrg             case '9':
   1941  1.1  mrg                 errorDigit = cast(char) c;
   1942  1.1  mrg                 base = 8;
   1943  1.1  mrg                 break;
   1944  1.1  mrg             case 'x':
   1945  1.1  mrg             case 'X':
   1946  1.1  mrg                 ++p;
   1947  1.1  mrg                 base = 16;
   1948  1.1  mrg                 break;
   1949  1.1  mrg             case 'b':
   1950  1.1  mrg             case 'B':
   1951  1.1  mrg                 if (Ccompile)
   1952  1.1  mrg                     error("binary constants not allowed");
   1953  1.1  mrg                 ++p;
   1954  1.1  mrg                 base = 2;
   1955  1.1  mrg                 break;
   1956  1.1  mrg             case '.':
   1957  1.1  mrg                 if (p[1] == '.')
   1958  1.1  mrg                     goto Ldone; // if ".."
   1959  1.1  mrg                 if (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80)
   1960  1.1  mrg                 {
   1961  1.1  mrg                     if (Ccompile && (p[1] == 'f' || p[1] == 'F' || p[1] == 'l' || p[1] == 'L'))
   1962  1.1  mrg                         goto Lreal;  // if `0.f` or `0.L`
   1963  1.1  mrg                     goto Ldone; // if ".identifier" or ".unicode"
   1964  1.1  mrg                 }
   1965  1.1  mrg                 goto Lreal; // '.' is part of current token
   1966  1.1  mrg             case 'i':
   1967  1.1  mrg             case 'f':
   1968  1.1  mrg             case 'F':
   1969  1.1  mrg                 goto Lreal;
   1970  1.1  mrg             case '_':
   1971  1.1  mrg                 if (Ccompile)
   1972  1.1  mrg                     error("embedded `_` not allowed");
   1973  1.1  mrg                 ++p;
   1974  1.1  mrg                 base = 8;
   1975  1.1  mrg                 break;
   1976  1.1  mrg             case 'L':
   1977  1.1  mrg                 if (p[1] == 'i')
   1978  1.1  mrg                     goto Lreal;
   1979  1.1  mrg                 break;
   1980  1.1  mrg             default:
   1981  1.1  mrg                 break;
   1982  1.1  mrg             }
   1983  1.1  mrg         }
   1984  1.1  mrg         while (1)
   1985  1.1  mrg         {
   1986  1.1  mrg             c = *p;
   1987  1.1  mrg             switch (c)
   1988  1.1  mrg             {
   1989  1.1  mrg             case '0':
   1990  1.1  mrg             case '1':
   1991  1.1  mrg             case '2':
   1992  1.1  mrg             case '3':
   1993  1.1  mrg             case '4':
   1994  1.1  mrg             case '5':
   1995  1.1  mrg             case '6':
   1996  1.1  mrg             case '7':
   1997  1.1  mrg             case '8':
   1998  1.1  mrg             case '9':
   1999  1.1  mrg                 ++p;
   2000  1.1  mrg                 d = c - '0';
   2001  1.1  mrg                 break;
   2002  1.1  mrg             case 'a':
   2003  1.1  mrg             case 'b':
   2004  1.1  mrg             case 'c':
   2005  1.1  mrg             case 'd':
   2006  1.1  mrg             case 'e':
   2007  1.1  mrg             case 'f':
   2008  1.1  mrg             case 'A':
   2009  1.1  mrg             case 'B':
   2010  1.1  mrg             case 'C':
   2011  1.1  mrg             case 'D':
   2012  1.1  mrg             case 'E':
   2013  1.1  mrg             case 'F':
   2014  1.1  mrg                 ++p;
   2015  1.1  mrg                 if (base != 16)
   2016  1.1  mrg                 {
   2017  1.1  mrg                     if (c == 'e' || c == 'E' || c == 'f' || c == 'F')
   2018  1.1  mrg                         goto Lreal;
   2019  1.1  mrg                 }
   2020  1.1  mrg                 if (c >= 'a')
   2021  1.1  mrg                     d = c + 10 - 'a';
   2022  1.1  mrg                 else
   2023  1.1  mrg                     d = c + 10 - 'A';
   2024  1.1  mrg                 break;
   2025  1.1  mrg             case 'L':
   2026  1.1  mrg                 if (p[1] == 'i')
   2027  1.1  mrg                     goto Lreal;
   2028  1.1  mrg                 goto Ldone;
   2029  1.1  mrg             case '.':
   2030  1.1  mrg                 if (p[1] == '.')
   2031  1.1  mrg                     goto Ldone; // if ".."
   2032  1.1  mrg                 if (base <= 10 && n > 0 && (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80))
   2033  1.1  mrg                 {
   2034  1.1  mrg                     if (Ccompile && base == 10 &&
   2035  1.1  mrg                         (p[1] == 'e' || p[1] == 'E' || p[1] == 'f' || p[1] == 'F' || p[1] == 'l' || p[1] == 'L'))
   2036  1.1  mrg                         goto Lreal;  // if `1.e6` or `1.f` or `1.L`
   2037  1.1  mrg                     goto Ldone; // if ".identifier" or ".unicode"
   2038  1.1  mrg                 }
   2039  1.1  mrg                 if (base == 16 && (!ishex(p[1]) || p[1] == '_' || p[1] & 0x80))
   2040  1.1  mrg                     goto Ldone; // if ".identifier" or ".unicode"
   2041  1.1  mrg                 if (base == 2)
   2042  1.1  mrg                     goto Ldone; // if ".identifier" or ".unicode"
   2043  1.1  mrg                 goto Lreal; // otherwise as part of a floating point literal
   2044  1.1  mrg             case 'p':
   2045  1.1  mrg             case 'P':
   2046  1.1  mrg             case 'i':
   2047  1.1  mrg             Lreal:
   2048  1.1  mrg                 p = start;
   2049  1.1  mrg                 return inreal(t);
   2050  1.1  mrg             case '_':
   2051  1.1  mrg                 if (Ccompile)
   2052  1.1  mrg                     goto default;
   2053  1.1  mrg                 ++p;
   2054  1.1  mrg                 continue;
   2055  1.1  mrg             default:
   2056  1.1  mrg                 goto Ldone;
   2057  1.1  mrg             }
   2058  1.1  mrg             // got a digit here, set any necessary flags, check for errors
   2059  1.1  mrg             anyHexDigitsNoSingleUS = true;
   2060  1.1  mrg             anyBinaryDigitsNoSingleUS = true;
   2061  1.1  mrg             if (!errorDigit && d >= base)
   2062  1.1  mrg             {
   2063  1.1  mrg                 errorDigit = cast(char) c;
   2064  1.1  mrg             }
   2065  1.1  mrg             // Avoid expensive overflow check if we aren't at risk of overflow
   2066  1.1  mrg             if (n <= 0x0FFF_FFFF_FFFF_FFFFUL)
   2067  1.1  mrg                 n = n * base + d;
   2068  1.1  mrg             else
   2069  1.1  mrg             {
   2070  1.1  mrg                 import core.checkedint : mulu, addu;
   2071  1.1  mrg 
   2072  1.1  mrg                 n = mulu(n, base, overflow);
   2073  1.1  mrg                 n = addu(n, d, overflow);
   2074  1.1  mrg             }
   2075  1.1  mrg         }
   2076  1.1  mrg     Ldone:
   2077  1.1  mrg         if (errorDigit)
   2078  1.1  mrg         {
   2079  1.1  mrg             error("%s digit expected, not `%c`", base == 2 ? "binary".ptr :
   2080  1.1  mrg                                                  base == 8 ? "octal".ptr :
   2081  1.1  mrg                                                  "decimal".ptr, errorDigit);
   2082  1.1  mrg             err = true;
   2083  1.1  mrg         }
   2084  1.1  mrg         if (overflow && !err)
   2085  1.1  mrg         {
   2086  1.1  mrg             error("integer overflow");
   2087  1.1  mrg             err = true;
   2088  1.1  mrg         }
   2089  1.1  mrg         if ((base == 2 && !anyBinaryDigitsNoSingleUS) ||
   2090  1.1  mrg             (base == 16 && !anyHexDigitsNoSingleUS))
   2091  1.1  mrg             error("`%.*s` isn't a valid integer literal, use `%.*s0` instead", cast(int)(p - start), start, 2, start);
   2092  1.1  mrg 
   2093  1.1  mrg         t.unsvalue = n;
   2094  1.1  mrg 
   2095  1.1  mrg         if (Ccompile)
   2096  1.1  mrg             return cnumber(base, n);
   2097  1.1  mrg 
   2098  1.1  mrg         enum FLAGS : int
   2099  1.1  mrg         {
   2100  1.1  mrg             none = 0,
   2101  1.1  mrg             decimal = 1, // decimal
   2102  1.1  mrg             unsigned = 2, // u or U suffix
   2103  1.1  mrg             long_ = 4, // L suffix
   2104  1.1  mrg         }
   2105  1.1  mrg 
   2106  1.1  mrg         FLAGS flags = (base == 10) ? FLAGS.decimal : FLAGS.none;
   2107  1.1  mrg         // Parse trailing 'u', 'U', 'l' or 'L' in any combination
   2108  1.1  mrg         const psuffix = p;
   2109  1.1  mrg         while (1)
   2110  1.1  mrg         {
   2111  1.1  mrg             FLAGS f;
   2112  1.1  mrg             switch (*p)
   2113  1.1  mrg             {
   2114  1.1  mrg             case 'U':
   2115  1.1  mrg             case 'u':
   2116  1.1  mrg                 f = FLAGS.unsigned;
   2117  1.1  mrg                 goto L1;
   2118  1.1  mrg             case 'l':
   2119  1.1  mrg                 f = FLAGS.long_;
   2120  1.1  mrg                 error("lower case integer suffix 'l' is not allowed. Please use 'L' instead");
   2121  1.1  mrg                 goto L1;
   2122  1.1  mrg             case 'L':
   2123  1.1  mrg                 f = FLAGS.long_;
   2124  1.1  mrg             L1:
   2125  1.1  mrg                 p++;
   2126  1.1  mrg                 if ((flags & f) && !err)
   2127  1.1  mrg                 {
   2128  1.1  mrg                     error("unrecognized token");
   2129  1.1  mrg                     err = true;
   2130  1.1  mrg                 }
   2131  1.1  mrg                 flags = cast(FLAGS)(flags | f);
   2132  1.1  mrg                 continue;
   2133  1.1  mrg             default:
   2134  1.1  mrg                 break;
   2135  1.1  mrg             }
   2136  1.1  mrg             break;
   2137  1.1  mrg         }
   2138  1.1  mrg         if (base == 8 && n >= 8)
   2139  1.1  mrg         {
   2140  1.1  mrg             if (err)
   2141  1.1  mrg                 // can't translate invalid octal value, just show a generic message
   2142  1.1  mrg                 error("octal literals larger than 7 are no longer supported");
   2143  1.1  mrg             else
   2144  1.1  mrg                 error("octal literals `0%llo%.*s` are no longer supported, use `std.conv.octal!\"%llo%.*s\"` instead",
   2145  1.1  mrg                     n, cast(int)(p - psuffix), psuffix, n, cast(int)(p - psuffix), psuffix);
   2146  1.1  mrg         }
   2147  1.1  mrg         TOK result;
   2148  1.1  mrg         switch (flags)
   2149  1.1  mrg         {
   2150  1.1  mrg         case FLAGS.none:
   2151  1.1  mrg             /* Octal or Hexadecimal constant.
   2152  1.1  mrg              * First that fits: int, uint, long, ulong
   2153  1.1  mrg              */
   2154  1.1  mrg             if (n & 0x8000000000000000L)
   2155  1.1  mrg                 result = TOK.uns64Literal;
   2156  1.1  mrg             else if (n & 0xFFFFFFFF00000000L)
   2157  1.1  mrg                 result = TOK.int64Literal;
   2158  1.1  mrg             else if (n & 0x80000000)
   2159  1.1  mrg                 result = TOK.uns32Literal;
   2160  1.1  mrg             else
   2161  1.1  mrg                 result = TOK.int32Literal;
   2162  1.1  mrg             break;
   2163  1.1  mrg         case FLAGS.decimal:
   2164  1.1  mrg             /* First that fits: int, long, long long
   2165  1.1  mrg              */
   2166  1.1  mrg             if (n & 0x8000000000000000L)
   2167  1.1  mrg             {
   2168  1.1  mrg                 result = TOK.uns64Literal;
   2169  1.1  mrg             }
   2170  1.1  mrg             else if (n & 0xFFFFFFFF80000000L)
   2171  1.1  mrg                 result = TOK.int64Literal;
   2172  1.1  mrg             else
   2173  1.1  mrg                 result = TOK.int32Literal;
   2174  1.1  mrg             break;
   2175  1.1  mrg         case FLAGS.unsigned:
   2176  1.1  mrg         case FLAGS.decimal | FLAGS.unsigned:
   2177  1.1  mrg             /* First that fits: uint, ulong
   2178  1.1  mrg              */
   2179  1.1  mrg             if (n & 0xFFFFFFFF00000000L)
   2180  1.1  mrg                 result = TOK.uns64Literal;
   2181  1.1  mrg             else
   2182  1.1  mrg                 result = TOK.uns32Literal;
   2183  1.1  mrg             break;
   2184  1.1  mrg         case FLAGS.decimal | FLAGS.long_:
   2185  1.1  mrg             if (n & 0x8000000000000000L)
   2186  1.1  mrg             {
   2187  1.1  mrg                 if (!err)
   2188  1.1  mrg                 {
   2189  1.1  mrg                     error("signed integer overflow");
   2190  1.1  mrg                     err = true;
   2191  1.1  mrg                 }
   2192  1.1  mrg                 result = TOK.uns64Literal;
   2193  1.1  mrg             }
   2194  1.1  mrg             else
   2195  1.1  mrg                 result = TOK.int64Literal;
   2196  1.1  mrg             break;
   2197  1.1  mrg         case FLAGS.long_:
   2198  1.1  mrg             if (n & 0x8000000000000000L)
   2199  1.1  mrg                 result = TOK.uns64Literal;
   2200  1.1  mrg             else
   2201  1.1  mrg                 result = TOK.int64Literal;
   2202  1.1  mrg             break;
   2203  1.1  mrg         case FLAGS.unsigned | FLAGS.long_:
   2204  1.1  mrg         case FLAGS.decimal | FLAGS.unsigned | FLAGS.long_:
   2205  1.1  mrg             result = TOK.uns64Literal;
   2206  1.1  mrg             break;
   2207  1.1  mrg         default:
   2208  1.1  mrg             debug
   2209  1.1  mrg             {
   2210  1.1  mrg                 printf("%x\n", flags);
   2211  1.1  mrg             }
   2212  1.1  mrg             assert(0);
   2213  1.1  mrg         }
   2214  1.1  mrg         return result;
   2215  1.1  mrg     }
   2216  1.1  mrg 
   2217  1.1  mrg     /**************************************
   2218  1.1  mrg      * Lex C integer-suffix
   2219  1.1  mrg      * Params:
   2220  1.1  mrg      *  base = number base
   2221  1.1  mrg      *  n = raw integer value
   2222  1.1  mrg      * Returns:
   2223  1.1  mrg      *  token value
   2224  1.1  mrg      */
   2225  1.1  mrg     private TOK cnumber(int base, uinteger_t n)
   2226  1.1  mrg     {
   2227  1.1  mrg         /* C11 6.4.4.1
   2228  1.1  mrg          * Parse trailing suffixes:
   2229  1.1  mrg          *   u or U
   2230  1.1  mrg          *   l or L
   2231  1.1  mrg          *   ll or LL
   2232  1.1  mrg          */
   2233  1.1  mrg         enum FLAGS : uint
   2234  1.1  mrg         {
   2235  1.1  mrg             octalhex = 1, // octal or hexadecimal
   2236  1.1  mrg             decimal  = 2, // decimal
   2237  1.1  mrg             unsigned = 4, // u or U suffix
   2238  1.1  mrg             long_    = 8, // l or L suffix
   2239  1.1  mrg             llong    = 0x10 // ll or LL
   2240  1.1  mrg         }
   2241  1.1  mrg         FLAGS flags = (base == 10) ? FLAGS.decimal : FLAGS.octalhex;
   2242  1.1  mrg         bool err;
   2243  1.1  mrg     Lsuffixes:
   2244  1.1  mrg         while (1)
   2245  1.1  mrg         {
   2246  1.1  mrg             FLAGS f;
   2247  1.1  mrg             const cs = *p;
   2248  1.1  mrg             switch (cs)
   2249  1.1  mrg             {
   2250  1.1  mrg                 case 'U':
   2251  1.1  mrg                 case 'u':
   2252  1.1  mrg                     f = FLAGS.unsigned;
   2253  1.1  mrg                     break;
   2254  1.1  mrg 
   2255  1.1  mrg                 case 'l':
   2256  1.1  mrg                 case 'L':
   2257  1.1  mrg                     f = FLAGS.long_;
   2258  1.1  mrg                     if (cs == p[1])
   2259  1.1  mrg                     {
   2260  1.1  mrg                         f = FLAGS.long_ | FLAGS.llong;
   2261  1.1  mrg                         ++p;
   2262  1.1  mrg                     }
   2263  1.1  mrg                     break;
   2264  1.1  mrg 
   2265  1.1  mrg                 default:
   2266  1.1  mrg                     break Lsuffixes;
   2267  1.1  mrg             }
   2268  1.1  mrg             ++p;
   2269  1.1  mrg             if ((flags & f) && !err)
   2270  1.1  mrg             {
   2271  1.1  mrg                 error("duplicate integer suffixes");
   2272  1.1  mrg                 err = true;
   2273  1.1  mrg             }
   2274  1.1  mrg             flags = cast(FLAGS)(flags | f);
   2275  1.1  mrg         }
   2276  1.1  mrg 
   2277  1.1  mrg         TOK result = TOK.int32Literal;     // default
   2278  1.1  mrg         switch (flags)
   2279  1.1  mrg         {
   2280  1.1  mrg             /* Since D doesn't have a variable sized `long` or `unsigned long` type,
   2281  1.1  mrg              * this code deviates from C by picking D int, uint, long, or ulong instead
   2282  1.1  mrg              */
   2283  1.1  mrg 
   2284  1.1  mrg             case FLAGS.octalhex:
   2285  1.1  mrg                 /* Octal or Hexadecimal constant.
   2286  1.1  mrg                  * First that fits: int, unsigned, long, unsigned long,
   2287  1.1  mrg                  * long long, unsigned long long
   2288  1.1  mrg                  */
   2289  1.1  mrg                 if (n & 0x8000000000000000L)
   2290  1.1  mrg                     result = TOK.uns64Literal;      // unsigned long
   2291  1.1  mrg                 else if (n & 0xFFFFFFFF00000000L)
   2292  1.1  mrg                     result = TOK.int64Literal;      // long
   2293  1.1  mrg                 else if (n & 0x80000000)
   2294  1.1  mrg                     result = TOK.uns32Literal;
   2295  1.1  mrg                 else
   2296  1.1  mrg                     result = TOK.int32Literal;
   2297  1.1  mrg                 break;
   2298  1.1  mrg 
   2299  1.1  mrg             case FLAGS.decimal:
   2300  1.1  mrg                 /* First that fits: int, long, long long
   2301  1.1  mrg                  */
   2302  1.1  mrg                 if (n & 0x8000000000000000L)
   2303  1.1  mrg                     result = TOK.uns64Literal;      // unsigned long
   2304  1.1  mrg                 else if (n & 0xFFFFFFFF80000000L)
   2305  1.1  mrg                     result = TOK.int64Literal;      // long
   2306  1.1  mrg                 else
   2307  1.1  mrg                     result = TOK.int32Literal;
   2308  1.1  mrg                 break;
   2309  1.1  mrg 
   2310  1.1  mrg             case FLAGS.octalhex | FLAGS.unsigned:
   2311  1.1  mrg             case FLAGS.decimal | FLAGS.unsigned:
   2312  1.1  mrg                 /* First that fits: unsigned, unsigned long, unsigned long long
   2313  1.1  mrg                  */
   2314  1.1  mrg                 if (n & 0xFFFFFFFF00000000L)
   2315  1.1  mrg                     result = TOK.uns64Literal;      // unsigned long
   2316  1.1  mrg                 else
   2317  1.1  mrg                     result = TOK.uns32Literal;
   2318  1.1  mrg                 break;
   2319  1.1  mrg 
   2320  1.1  mrg             case FLAGS.decimal | FLAGS.long_:
   2321  1.1  mrg                 /* First that fits: long, long long
   2322  1.1  mrg                  */
   2323  1.1  mrg                 if (longsize == 4 || long_longsize == 4)
   2324  1.1  mrg                 {
   2325  1.1  mrg                     if (n & 0xFFFFFFFF_80000000L)
   2326  1.1  mrg                         result = TOK.int64Literal;
   2327  1.1  mrg                     else
   2328  1.1  mrg                         result = TOK.int32Literal;  // long
   2329  1.1  mrg                 }
   2330  1.1  mrg                 else
   2331  1.1  mrg                 {
   2332  1.1  mrg                     result = TOK.int64Literal;      // long
   2333  1.1  mrg                 }
   2334  1.1  mrg                 break;
   2335  1.1  mrg 
   2336  1.1  mrg             case FLAGS.octalhex | FLAGS.long_:
   2337  1.1  mrg                 /* First that fits: long, unsigned long, long long,
   2338  1.1  mrg                  * unsigned long long
   2339  1.1  mrg                  */
   2340  1.1  mrg                 if (longsize == 4 || long_longsize == 4)
   2341  1.1  mrg                 {
   2342  1.1  mrg                     if (n & 0x8000000000000000L)
   2343  1.1  mrg                         result = TOK.uns64Literal;
   2344  1.1  mrg                     else if (n & 0xFFFFFFFF00000000L)
   2345  1.1  mrg                         result = TOK.int64Literal;
   2346  1.1  mrg                     else if (n & 0x80000000)
   2347  1.1  mrg                         result = TOK.uns32Literal;      // unsigned long
   2348  1.1  mrg                     else
   2349  1.1  mrg                         result = TOK.int32Literal;      // long
   2350  1.1  mrg                 }
   2351  1.1  mrg                 else
   2352  1.1  mrg                 {
   2353  1.1  mrg                     if (n & 0x80000000_00000000L)
   2354  1.1  mrg                         result = TOK.uns64Literal;      // unsigned long
   2355  1.1  mrg                     else
   2356  1.1  mrg                         result = TOK.int64Literal;      // long
   2357  1.1  mrg                 }
   2358  1.1  mrg                 break;
   2359  1.1  mrg 
   2360  1.1  mrg             case FLAGS.octalhex | FLAGS.unsigned | FLAGS.long_:
   2361  1.1  mrg             case FLAGS.decimal  | FLAGS.unsigned | FLAGS.long_:
   2362  1.1  mrg                 /* First that fits: unsigned long, unsigned long long
   2363  1.1  mrg                  */
   2364  1.1  mrg                 if (longsize == 4 || long_longsize == 4)
   2365  1.1  mrg                 {
   2366  1.1  mrg                     if (n & 0xFFFFFFFF00000000L)
   2367  1.1  mrg                         result = TOK.uns64Literal;
   2368  1.1  mrg                     else
   2369  1.1  mrg                         result = TOK.uns32Literal;      // unsigned long
   2370  1.1  mrg                 }
   2371  1.1  mrg                 else
   2372  1.1  mrg                 {
   2373  1.1  mrg                     result = TOK.uns64Literal;  // unsigned long
   2374  1.1  mrg                 }
   2375  1.1  mrg                 break;
   2376  1.1  mrg 
   2377  1.1  mrg             case FLAGS.octalhex | FLAGS.long_ | FLAGS.llong:
   2378  1.1  mrg                 /* First that fits: long long, unsigned long long
   2379  1.1  mrg                  */
   2380  1.1  mrg                 if (n & 0x8000000000000000L)
   2381  1.1  mrg                     result = TOK.uns64Literal;
   2382  1.1  mrg                 else
   2383  1.1  mrg                     result = TOK.int64Literal;
   2384  1.1  mrg                 break;
   2385  1.1  mrg 
   2386  1.1  mrg             case FLAGS.decimal | FLAGS.long_ | FLAGS.llong:
   2387  1.1  mrg                 /* long long
   2388  1.1  mrg                  */
   2389  1.1  mrg                 result = TOK.int64Literal;
   2390  1.1  mrg                 break;
   2391  1.1  mrg 
   2392  1.1  mrg             case FLAGS.octalhex | FLAGS.long_ | FLAGS.unsigned | FLAGS.llong:
   2393  1.1  mrg             case FLAGS.decimal  | FLAGS.long_ | FLAGS.unsigned | FLAGS.llong:
   2394  1.1  mrg                 result = TOK.uns64Literal;
   2395  1.1  mrg                 break;
   2396  1.1  mrg 
   2397  1.1  mrg             default:
   2398  1.1  mrg                 debug printf("%x\n",flags);
   2399  1.1  mrg                 assert(0);
   2400  1.1  mrg         }
   2401  1.1  mrg         return result;
   2402  1.1  mrg     }
   2403  1.1  mrg 
   2404  1.1  mrg     /**************************************
   2405  1.1  mrg      * Read in characters, converting them to real.
   2406  1.1  mrg      * Bugs:
   2407  1.1  mrg      *      Exponent overflow not detected.
   2408  1.1  mrg      *      Too much requested precision is not detected.
   2409  1.1  mrg      */
   2410  1.1  mrg     private TOK inreal(Token* t)
   2411  1.1  mrg     {
   2412  1.1  mrg         //printf("Lexer::inreal()\n");
   2413  1.1  mrg         debug
   2414  1.1  mrg         {
   2415  1.1  mrg             assert(*p == '.' || isdigit(*p));
   2416  1.1  mrg         }
   2417  1.1  mrg         bool isWellformedString = true;
   2418  1.1  mrg         stringbuffer.setsize(0);
   2419  1.1  mrg         auto pstart = p;
   2420  1.1  mrg         bool hex = false;
   2421  1.1  mrg         dchar c = *p++;
   2422  1.1  mrg         // Leading '0x'
   2423  1.1  mrg         if (c == '0')
   2424  1.1  mrg         {
   2425  1.1  mrg             c = *p++;
   2426  1.1  mrg             if (c == 'x' || c == 'X')
   2427  1.1  mrg             {
   2428  1.1  mrg                 hex = true;
   2429  1.1  mrg                 c = *p++;
   2430  1.1  mrg             }
   2431  1.1  mrg         }
   2432  1.1  mrg         // Digits to left of '.'
   2433  1.1  mrg         while (1)
   2434  1.1  mrg         {
   2435  1.1  mrg             if (c == '.')
   2436  1.1  mrg             {
   2437  1.1  mrg                 c = *p++;
   2438  1.1  mrg                 break;
   2439  1.1  mrg             }
   2440  1.1  mrg             if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
   2441  1.1  mrg             {
   2442  1.1  mrg                 c = *p++;
   2443  1.1  mrg                 continue;
   2444  1.1  mrg             }
   2445  1.1  mrg             break;
   2446  1.1  mrg         }
   2447  1.1  mrg         // Digits to right of '.'
   2448  1.1  mrg         while (1)
   2449  1.1  mrg         {
   2450  1.1  mrg             if (isdigit(c) || (hex && isxdigit(c)) || c == '_')
   2451  1.1  mrg             {
   2452  1.1  mrg                 c = *p++;
   2453  1.1  mrg                 continue;
   2454  1.1  mrg             }
   2455  1.1  mrg             break;
   2456  1.1  mrg         }
   2457  1.1  mrg         if (c == 'e' || c == 'E' || (hex && (c == 'p' || c == 'P')))
   2458  1.1  mrg         {
   2459  1.1  mrg             c = *p++;
   2460  1.1  mrg             if (c == '-' || c == '+')
   2461  1.1  mrg             {
   2462  1.1  mrg                 c = *p++;
   2463  1.1  mrg             }
   2464  1.1  mrg             bool anyexp = false;
   2465  1.1  mrg             while (1)
   2466  1.1  mrg             {
   2467  1.1  mrg                 if (isdigit(c))
   2468  1.1  mrg                 {
   2469  1.1  mrg                     anyexp = true;
   2470  1.1  mrg                     c = *p++;
   2471  1.1  mrg                     continue;
   2472  1.1  mrg                 }
   2473  1.1  mrg                 if (c == '_')
   2474  1.1  mrg                 {
   2475  1.1  mrg                     if (Ccompile)
   2476  1.1  mrg                         error("embedded `_` in numeric literals not allowed");
   2477  1.1  mrg                     c = *p++;
   2478  1.1  mrg                     continue;
   2479  1.1  mrg                 }
   2480  1.1  mrg                 if (!anyexp)
   2481  1.1  mrg                 {
   2482  1.1  mrg                     error("missing exponent");
   2483  1.1  mrg                     isWellformedString = false;
   2484  1.1  mrg                 }
   2485  1.1  mrg                 break;
   2486  1.1  mrg             }
   2487  1.1  mrg         }
   2488  1.1  mrg         else if (hex)
   2489  1.1  mrg         {
   2490  1.1  mrg             error("exponent required for hex float");
   2491  1.1  mrg             isWellformedString = false;
   2492  1.1  mrg         }
   2493  1.1  mrg         --p;
   2494  1.1  mrg         while (pstart < p)
   2495  1.1  mrg         {
   2496  1.1  mrg             if (*pstart != '_')
   2497  1.1  mrg                 stringbuffer.writeByte(*pstart);
   2498  1.1  mrg             ++pstart;
   2499  1.1  mrg         }
   2500  1.1  mrg         stringbuffer.writeByte(0);
   2501  1.1  mrg         auto sbufptr = cast(const(char)*)stringbuffer[].ptr;
   2502  1.1  mrg         TOK result;
   2503  1.1  mrg         bool isOutOfRange = false;
   2504  1.1  mrg         t.floatvalue = (isWellformedString ? CTFloat.parse(sbufptr, &isOutOfRange) : CTFloat.zero);
   2505  1.1  mrg         switch (*p)
   2506  1.1  mrg         {
   2507  1.1  mrg         case 'F':
   2508  1.1  mrg         case 'f':
   2509  1.1  mrg             if (isWellformedString && !isOutOfRange)
   2510  1.1  mrg                 isOutOfRange = Port.isFloat32LiteralOutOfRange(sbufptr);
   2511  1.1  mrg             result = TOK.float32Literal;
   2512  1.1  mrg             p++;
   2513  1.1  mrg             break;
   2514  1.1  mrg         default:
   2515  1.1  mrg             if (isWellformedString && !isOutOfRange)
   2516  1.1  mrg                 isOutOfRange = Port.isFloat64LiteralOutOfRange(sbufptr);
   2517  1.1  mrg             result = TOK.float64Literal;
   2518  1.1  mrg             break;
   2519  1.1  mrg         case 'l':
   2520  1.1  mrg             if (!Ccompile)
   2521  1.1  mrg                 error("use 'L' suffix instead of 'l'");
   2522  1.1  mrg             goto case 'L';
   2523  1.1  mrg         case 'L':
   2524  1.1  mrg             ++p;
   2525  1.1  mrg             if (Ccompile && long_doublesize == 8)
   2526  1.1  mrg                 goto default;
   2527  1.1  mrg             result = TOK.float80Literal;
   2528  1.1  mrg             break;
   2529  1.1  mrg         }
   2530  1.1  mrg         if ((*p == 'i' || *p == 'I') && !Ccompile)
   2531  1.1  mrg         {
   2532  1.1  mrg             if (*p == 'I')
   2533  1.1  mrg                 error("use 'i' suffix instead of 'I'");
   2534  1.1  mrg             p++;
   2535  1.1  mrg             switch (result)
   2536  1.1  mrg             {
   2537  1.1  mrg             case TOK.float32Literal:
   2538  1.1  mrg                 result = TOK.imaginary32Literal;
   2539  1.1  mrg                 break;
   2540  1.1  mrg             case TOK.float64Literal:
   2541  1.1  mrg                 result = TOK.imaginary64Literal;
   2542  1.1  mrg                 break;
   2543  1.1  mrg             case TOK.float80Literal:
   2544  1.1  mrg                 result = TOK.imaginary80Literal;
   2545  1.1  mrg                 break;
   2546  1.1  mrg             default:
   2547  1.1  mrg                 break;
   2548  1.1  mrg             }
   2549  1.1  mrg         }
   2550  1.1  mrg         const isLong = (result == TOK.float80Literal || result == TOK.imaginary80Literal);
   2551  1.1  mrg         if (isOutOfRange && !isLong && (!Ccompile || hex))
   2552  1.1  mrg         {
   2553  1.1  mrg             /* C11 6.4.4.2 doesn't actually care if it is not representable if it is not hex
   2554  1.1  mrg              */
   2555  1.1  mrg             const char* suffix = (result == TOK.float32Literal || result == TOK.imaginary32Literal) ? "f" : "";
   2556  1.1  mrg             error(scanloc, "number `%s%s` is not representable", sbufptr, suffix);
   2557  1.1  mrg         }
   2558  1.1  mrg         debug
   2559  1.1  mrg         {
   2560  1.1  mrg             switch (result)
   2561  1.1  mrg             {
   2562  1.1  mrg             case TOK.float32Literal:
   2563  1.1  mrg             case TOK.float64Literal:
   2564  1.1  mrg             case TOK.float80Literal:
   2565  1.1  mrg             case TOK.imaginary32Literal:
   2566  1.1  mrg             case TOK.imaginary64Literal:
   2567  1.1  mrg             case TOK.imaginary80Literal:
   2568  1.1  mrg                 break;
   2569  1.1  mrg             default:
   2570  1.1  mrg                 assert(0);
   2571  1.1  mrg             }
   2572  1.1  mrg         }
   2573  1.1  mrg         return result;
   2574  1.1  mrg     }
   2575  1.1  mrg 
   2576  1.1  mrg     final Loc loc() pure @nogc
   2577  1.1  mrg     {
   2578  1.1  mrg         scanloc.charnum = cast(uint)(1 + p - line);
   2579  1.1  mrg         version (LocOffset)
   2580  1.1  mrg             scanloc.fileOffset = cast(uint)(p - base);
   2581  1.1  mrg         return scanloc;
   2582  1.1  mrg     }
   2583  1.1  mrg 
   2584  1.1  mrg     final void error(const(char)* format, ...)
   2585  1.1  mrg     {
   2586  1.1  mrg         va_list args;
   2587  1.1  mrg         va_start(args, format);
   2588  1.1  mrg         .verror(token.loc, format, args);
   2589  1.1  mrg         va_end(args);
   2590  1.1  mrg     }
   2591  1.1  mrg 
   2592  1.1  mrg     final void error(const ref Loc loc, const(char)* format, ...)
   2593  1.1  mrg     {
   2594  1.1  mrg         va_list args;
   2595  1.1  mrg         va_start(args, format);
   2596  1.1  mrg         .verror(loc, format, args);
   2597  1.1  mrg         va_end(args);
   2598  1.1  mrg     }
   2599  1.1  mrg 
   2600  1.1  mrg     final void deprecation(const(char)* format, ...)
   2601  1.1  mrg     {
   2602  1.1  mrg         va_list args;
   2603  1.1  mrg         va_start(args, format);
   2604  1.1  mrg         .vdeprecation(token.loc, format, args);
   2605  1.1  mrg         va_end(args);
   2606  1.1  mrg     }
   2607  1.1  mrg 
   2608  1.1  mrg     /***************************************
   2609  1.1  mrg      * Parse special token sequence:
   2610  1.1  mrg      * Returns:
   2611  1.1  mrg      *  true if the special token sequence was handled
   2612  1.1  mrg      * References:
   2613  1.1  mrg      *  https://dlang.org/spec/lex.html#special-token-sequence
   2614  1.1  mrg      */
   2615  1.1  mrg     bool parseSpecialTokenSequence()
   2616  1.1  mrg     {
   2617  1.1  mrg         Token n;
   2618  1.1  mrg         scan(&n);
   2619  1.1  mrg         if (n.value == TOK.identifier)
   2620  1.1  mrg         {
   2621  1.1  mrg             if (n.ident == Id.line)
   2622  1.1  mrg             {
   2623  1.1  mrg                 poundLine(n, false);
   2624  1.1  mrg                 return true;
   2625  1.1  mrg             }
   2626  1.1  mrg             else
   2627  1.1  mrg             {
   2628  1.1  mrg                 const locx = loc();
   2629  1.1  mrg                 warning(locx, "C preprocessor directive `#%s` is not supported", n.ident.toChars());
   2630  1.1  mrg             }
   2631  1.1  mrg         }
   2632  1.1  mrg         else if (n.value == TOK.if_)
   2633  1.1  mrg         {
   2634  1.1  mrg             error("C preprocessor directive `#if` is not supported, use `version` or `static if`");
   2635  1.1  mrg         }
   2636  1.1  mrg         return false;
   2637  1.1  mrg     }
   2638  1.1  mrg 
   2639  1.1  mrg     /*********************************************
   2640  1.1  mrg      * Parse line/file preprocessor directive:
   2641  1.1  mrg      *    #line linnum [filespec]
   2642  1.1  mrg      * Allow __LINE__ for linnum, and __FILE__ for filespec.
   2643  1.1  mrg      * Accept linemarker format:
   2644  1.1  mrg      *    # linnum [filespec] {flags}
   2645  1.1  mrg      * There can be zero or more flags, which are one of the digits 1..4, and
   2646  1.1  mrg      * must be in ascending order. The flags are ignored.
   2647  1.1  mrg      * Params:
   2648  1.1  mrg      *  tok = token we're on, which is linnum of linemarker
   2649  1.1  mrg      *  linemarker = true if line marker format and lexer is on linnum
   2650  1.1  mrg      * References:
   2651  1.1  mrg      *  linemarker https://gcc.gnu.org/onlinedocs/gcc-11.1.0/cpp/Preprocessor-Output.html
   2652  1.1  mrg      */
   2653  1.1  mrg     final void poundLine(ref Token tok, bool linemarker)
   2654  1.1  mrg     {
   2655  1.1  mrg         auto linnum = this.scanloc.linnum;
   2656  1.1  mrg         const(char)* filespec = null;
   2657  1.1  mrg         bool flags;
   2658  1.1  mrg 
   2659  1.1  mrg         if (!linemarker)
   2660  1.1  mrg             scan(&tok);
   2661  1.1  mrg         if (tok.value == TOK.int32Literal || tok.value == TOK.int64Literal)
   2662  1.1  mrg         {
   2663  1.1  mrg             const lin = cast(int)(tok.unsvalue);
   2664  1.1  mrg             if (lin != tok.unsvalue)
   2665  1.1  mrg             {
   2666  1.1  mrg                 error(tok.loc, "line number `%lld` out of range", cast(ulong)tok.unsvalue);
   2667  1.1  mrg                 skipToNextLine();
   2668  1.1  mrg                 return;
   2669  1.1  mrg             }
   2670  1.1  mrg             else
   2671  1.1  mrg                 linnum = lin;
   2672  1.1  mrg         }
   2673  1.1  mrg         else if (tok.value == TOK.line)  // #line __LINE__
   2674  1.1  mrg         {
   2675  1.1  mrg         }
   2676  1.1  mrg         else
   2677  1.1  mrg         {
   2678  1.1  mrg             error(tok.loc, "positive integer argument expected following `#line`");
   2679  1.1  mrg             if (tok.value != TOK.endOfLine)
   2680  1.1  mrg                 skipToNextLine();
   2681  1.1  mrg             return;
   2682  1.1  mrg         }
   2683  1.1  mrg         while (1)
   2684  1.1  mrg         {
   2685  1.1  mrg             scan(&tok);
   2686  1.1  mrg             switch (tok.value)
   2687  1.1  mrg             {
   2688  1.1  mrg             case TOK.endOfFile:
   2689  1.1  mrg             case TOK.endOfLine:
   2690  1.1  mrg                 if (!inTokenStringConstant)
   2691  1.1  mrg                 {
   2692  1.1  mrg                     this.scanloc.linnum = linnum;
   2693  1.1  mrg                     if (filespec)
   2694  1.1  mrg                         this.scanloc.filename = filespec;
   2695  1.1  mrg                 }
   2696  1.1  mrg                 return;
   2697  1.1  mrg             case TOK.file:
   2698  1.1  mrg                 if (filespec || flags)
   2699  1.1  mrg                     goto Lerr;
   2700  1.1  mrg                 filespec = mem.xstrdup(scanloc.filename);
   2701  1.1  mrg                 continue;
   2702  1.1  mrg             case TOK.string_:
   2703  1.1  mrg                 if (filespec || flags)
   2704  1.1  mrg                     goto Lerr;
   2705  1.1  mrg                 if (tok.ptr[0] != '"' || tok.postfix != 0)
   2706  1.1  mrg                     goto Lerr;
   2707  1.1  mrg                 filespec = tok.ustring;
   2708  1.1  mrg                 continue;
   2709  1.1  mrg             case TOK.int32Literal:
   2710  1.1  mrg                 if (!filespec)
   2711  1.1  mrg                     goto Lerr;
   2712  1.1  mrg                 if (linemarker && tok.unsvalue >= 1 && tok.unsvalue <= 4)
   2713  1.1  mrg                 {
   2714  1.1  mrg                     flags = true;   // linemarker flags seen
   2715  1.1  mrg                     continue;
   2716  1.1  mrg                 }
   2717  1.1  mrg                 goto Lerr;
   2718  1.1  mrg             default:
   2719  1.1  mrg                 goto Lerr;
   2720  1.1  mrg             }
   2721  1.1  mrg         }
   2722  1.1  mrg     Lerr:
   2723  1.1  mrg         if (filespec is null)
   2724  1.1  mrg             error(tok.loc, "invalid filename for `#line` directive");
   2725  1.1  mrg         else if (linemarker)
   2726  1.1  mrg             error(tok.loc, "invalid flag for line marker directive");
   2727  1.1  mrg         else if (!Ccompile)
   2728  1.1  mrg             error(tok.loc, "found `%s` when expecting new line following `#line` directive", tok.toChars());
   2729  1.1  mrg         if (tok.value != TOK.endOfLine)
   2730  1.1  mrg             skipToNextLine();
   2731  1.1  mrg     }
   2732  1.1  mrg 
   2733  1.1  mrg     /***************************************
   2734  1.1  mrg      * Scan forward to start of next line.
   2735  1.1  mrg      */
   2736  1.1  mrg     final void skipToNextLine()
   2737  1.1  mrg     {
   2738  1.1  mrg         while (1)
   2739  1.1  mrg         {
   2740  1.1  mrg             switch (*p)
   2741  1.1  mrg             {
   2742  1.1  mrg             case 0:
   2743  1.1  mrg             case 0x1A:
   2744  1.1  mrg                 return; // do not advance p
   2745  1.1  mrg 
   2746  1.1  mrg             case '\n':
   2747  1.1  mrg                 ++p;
   2748  1.1  mrg                 break;
   2749  1.1  mrg 
   2750  1.1  mrg             case '\r':
   2751  1.1  mrg                 ++p;
   2752  1.1  mrg                 if (p[0] == '\n')
   2753  1.1  mrg                    ++p;
   2754  1.1  mrg                 break;
   2755  1.1  mrg 
   2756  1.1  mrg             default:
   2757  1.1  mrg                 if (*p & 0x80)
   2758  1.1  mrg                 {
   2759  1.1  mrg                     const u = decodeUTF();
   2760  1.1  mrg                     if (u == PS || u == LS)
   2761  1.1  mrg                     {
   2762  1.1  mrg                         ++p;
   2763  1.1  mrg                         break;
   2764  1.1  mrg                     }
   2765  1.1  mrg                 }
   2766  1.1  mrg                 ++p;
   2767  1.1  mrg                 continue;
   2768  1.1  mrg             }
   2769  1.1  mrg             break;
   2770  1.1  mrg         }
   2771  1.1  mrg         endOfLine();
   2772  1.1  mrg         tokenizeNewlines = false;
   2773  1.1  mrg     }
   2774  1.1  mrg 
   2775  1.1  mrg     /********************************************
   2776  1.1  mrg      * Decode UTF character.
   2777  1.1  mrg      * Issue error messages for invalid sequences.
   2778  1.1  mrg      * Return decoded character, advance p to last character in UTF sequence.
   2779  1.1  mrg      */
   2780  1.1  mrg     private uint decodeUTF()
   2781  1.1  mrg     {
   2782  1.1  mrg         const s = p;
   2783  1.1  mrg         assert(*s & 0x80);
   2784  1.1  mrg         // Check length of remaining string up to 4 UTF-8 characters
   2785  1.1  mrg         size_t len;
   2786  1.1  mrg         for (len = 1; len < 4 && s[len]; len++)
   2787  1.1  mrg         {
   2788  1.1  mrg         }
   2789  1.1  mrg         size_t idx = 0;
   2790  1.1  mrg         dchar u;
   2791  1.1  mrg         const msg = utf_decodeChar(s[0 .. len], idx, u);
   2792  1.1  mrg         p += idx - 1;
   2793  1.1  mrg         if (msg)
   2794  1.1  mrg         {
   2795  1.1  mrg             error("%.*s", cast(int)msg.length, msg.ptr);
   2796  1.1  mrg         }
   2797  1.1  mrg         return u;
   2798  1.1  mrg     }
   2799  1.1  mrg 
   2800  1.1  mrg     /***************************************************
   2801  1.1  mrg      * Parse doc comment embedded between t.ptr and p.
   2802  1.1  mrg      * Remove trailing blanks and tabs from lines.
   2803  1.1  mrg      * Replace all newlines with \n.
   2804  1.1  mrg      * Remove leading comment character from each line.
   2805  1.1  mrg      * Decide if it's a lineComment or a blockComment.
   2806  1.1  mrg      * Append to previous one for this token.
   2807  1.1  mrg      *
   2808  1.1  mrg      * If newParagraph is true, an extra newline will be
   2809  1.1  mrg      * added between adjoining doc comments.
   2810  1.1  mrg      */
   2811  1.1  mrg     private void getDocComment(Token* t, uint lineComment, bool newParagraph) pure
   2812  1.1  mrg     {
   2813  1.1  mrg         /* ct tells us which kind of comment it is: '/', '*', or '+'
   2814  1.1  mrg          */
   2815  1.1  mrg         const ct = t.ptr[2];
   2816  1.1  mrg         /* Start of comment text skips over / * *, / + +, or / / /
   2817  1.1  mrg          */
   2818  1.1  mrg         const(char)* q = t.ptr + 3; // start of comment text
   2819  1.1  mrg         const(char)* qend = p;
   2820  1.1  mrg         if (ct == '*' || ct == '+')
   2821  1.1  mrg             qend -= 2;
   2822  1.1  mrg         /* Scan over initial row of ****'s or ++++'s or ////'s
   2823  1.1  mrg          */
   2824  1.1  mrg         for (; q < qend; q++)
   2825  1.1  mrg         {
   2826  1.1  mrg             if (*q != ct)
   2827  1.1  mrg                 break;
   2828  1.1  mrg         }
   2829  1.1  mrg         /* Remove leading spaces until start of the comment
   2830  1.1  mrg          */
   2831  1.1  mrg         int linestart = 0;
   2832  1.1  mrg         if (ct == '/')
   2833  1.1  mrg         {
   2834  1.1  mrg             while (q < qend && (*q == ' ' || *q == '\t'))
   2835  1.1  mrg                 ++q;
   2836  1.1  mrg         }
   2837  1.1  mrg         else if (q < qend)
   2838  1.1  mrg         {
   2839  1.1  mrg             if (*q == '\r')
   2840  1.1  mrg             {
   2841  1.1  mrg                 ++q;
   2842  1.1  mrg                 if (q < qend && *q == '\n')
   2843  1.1  mrg                     ++q;
   2844  1.1  mrg                 linestart = 1;
   2845  1.1  mrg             }
   2846  1.1  mrg             else if (*q == '\n')
   2847  1.1  mrg             {
   2848  1.1  mrg                 ++q;
   2849  1.1  mrg                 linestart = 1;
   2850  1.1  mrg             }
   2851  1.1  mrg         }
   2852  1.1  mrg         /* Remove trailing row of ****'s or ++++'s
   2853  1.1  mrg          */
   2854  1.1  mrg         if (ct != '/')
   2855  1.1  mrg         {
   2856  1.1  mrg             for (; q < qend; qend--)
   2857  1.1  mrg             {
   2858  1.1  mrg                 if (qend[-1] != ct)
   2859  1.1  mrg                     break;
   2860  1.1  mrg             }
   2861  1.1  mrg         }
   2862  1.1  mrg         /* Comment is now [q .. qend].
   2863  1.1  mrg          * Canonicalize it into buf[].
   2864  1.1  mrg          */
   2865  1.1  mrg         OutBuffer buf;
   2866  1.1  mrg 
   2867  1.1  mrg         void trimTrailingWhitespace()
   2868  1.1  mrg         {
   2869  1.1  mrg             const s = buf[];
   2870  1.1  mrg             auto len = s.length;
   2871  1.1  mrg             while (len && (s[len - 1] == ' ' || s[len - 1] == '\t'))
   2872  1.1  mrg                 --len;
   2873  1.1  mrg             buf.setsize(len);
   2874  1.1  mrg         }
   2875  1.1  mrg 
   2876  1.1  mrg         for (; q < qend; q++)
   2877  1.1  mrg         {
   2878  1.1  mrg             char c = *q;
   2879  1.1  mrg             switch (c)
   2880  1.1  mrg             {
   2881  1.1  mrg             case '*':
   2882  1.1  mrg             case '+':
   2883  1.1  mrg                 if (linestart && c == ct)
   2884  1.1  mrg                 {
   2885  1.1  mrg                     linestart = 0;
   2886  1.1  mrg                     /* Trim preceding whitespace up to preceding \n
   2887  1.1  mrg                      */
   2888  1.1  mrg                     trimTrailingWhitespace();
   2889  1.1  mrg                     continue;
   2890  1.1  mrg                 }
   2891  1.1  mrg                 break;
   2892  1.1  mrg             case ' ':
   2893  1.1  mrg             case '\t':
   2894  1.1  mrg                 break;
   2895  1.1  mrg             case '\r':
   2896  1.1  mrg                 if (q[1] == '\n')
   2897  1.1  mrg                     continue; // skip the \r
   2898  1.1  mrg                 goto Lnewline;
   2899  1.1  mrg             default:
   2900  1.1  mrg                 if (c == 226)
   2901  1.1  mrg                 {
   2902  1.1  mrg                     // If LS or PS
   2903  1.1  mrg                     if (q[1] == 128 && (q[2] == 168 || q[2] == 169))
   2904  1.1  mrg                     {
   2905  1.1  mrg                         q += 2;
   2906  1.1  mrg                         goto Lnewline;
   2907  1.1  mrg                     }
   2908  1.1  mrg                 }
   2909  1.1  mrg                 linestart = 0;
   2910  1.1  mrg                 break;
   2911  1.1  mrg             Lnewline:
   2912  1.1  mrg                 c = '\n'; // replace all newlines with \n
   2913  1.1  mrg                 goto case;
   2914  1.1  mrg             case '\n':
   2915  1.1  mrg                 linestart = 1;
   2916  1.1  mrg                 /* Trim trailing whitespace
   2917  1.1  mrg                  */
   2918  1.1  mrg                 trimTrailingWhitespace();
   2919  1.1  mrg                 break;
   2920  1.1  mrg             }
   2921  1.1  mrg             buf.writeByte(c);
   2922  1.1  mrg         }
   2923  1.1  mrg         /* Trim trailing whitespace (if the last line does not have newline)
   2924  1.1  mrg          */
   2925  1.1  mrg         trimTrailingWhitespace();
   2926  1.1  mrg 
   2927  1.1  mrg         // Always end with a newline
   2928  1.1  mrg         const s = buf[];
   2929  1.1  mrg         if (s.length == 0 || s[$ - 1] != '\n')
   2930  1.1  mrg             buf.writeByte('\n');
   2931  1.1  mrg 
   2932  1.1  mrg         // It's a line comment if the start of the doc comment comes
   2933  1.1  mrg         // after other non-whitespace on the same line.
   2934  1.1  mrg         auto dc = (lineComment && anyToken) ? &t.lineComment : &t.blockComment;
   2935  1.1  mrg         // Combine with previous doc comment, if any
   2936  1.1  mrg         if (*dc)
   2937  1.1  mrg             *dc = combineComments(*dc, buf[], newParagraph).toDString();
   2938  1.1  mrg         else
   2939  1.1  mrg             *dc = buf.extractSlice(true);
   2940  1.1  mrg     }
   2941  1.1  mrg 
   2942  1.1  mrg     /********************************************
   2943  1.1  mrg      * Combine two document comments into one,
   2944  1.1  mrg      * separated by an extra newline if newParagraph is true.
   2945  1.1  mrg      */
   2946  1.1  mrg     static const(char)* combineComments(const(char)[] c1, const(char)[] c2, bool newParagraph) pure
   2947  1.1  mrg     {
   2948  1.1  mrg         //debug printf("Lexer::combineComments('%*.s', '%*.s', '%i')\n", cast(int) c1.length, c1.ptr, cast(int) c2.length, c2.ptr, newParagraph);
   2949  1.1  mrg         const(int) newParagraphSize = newParagraph ? 1 : 0; // Size of the combining '\n'
   2950  1.1  mrg         if (!c1)
   2951  1.1  mrg             return c2.ptr;
   2952  1.1  mrg         if (!c2)
   2953  1.1  mrg             return c1.ptr;
   2954  1.1  mrg 
   2955  1.1  mrg         int insertNewLine = 0;
   2956  1.1  mrg         if (c1.length && c1[$ - 1] != '\n')
   2957  1.1  mrg             insertNewLine = 1;
   2958  1.1  mrg         const retSize = c1.length + insertNewLine + newParagraphSize + c2.length;
   2959  1.1  mrg         auto p = cast(char*)mem.xmalloc_noscan(retSize + 1);
   2960  1.1  mrg         p[0 .. c1.length] = c1[];
   2961  1.1  mrg         if (insertNewLine)
   2962  1.1  mrg             p[c1.length] = '\n';
   2963  1.1  mrg         if (newParagraph)
   2964  1.1  mrg             p[c1.length + insertNewLine] = '\n';
   2965  1.1  mrg         p[retSize - c2.length .. retSize] = c2[];
   2966  1.1  mrg         p[retSize] = 0;
   2967  1.1  mrg         return p;
   2968  1.1  mrg     }
   2969  1.1  mrg 
   2970  1.1  mrg     /**************************
   2971  1.1  mrg      * `p` should be at start of next line
   2972  1.1  mrg      */
   2973  1.1  mrg     private void endOfLine() pure @nogc @safe
   2974  1.1  mrg     {
   2975  1.1  mrg         scanloc.linnum++;
   2976  1.1  mrg         line = p;
   2977  1.1  mrg     }
   2978  1.1  mrg }
   2979  1.1  mrg 
   2980  1.1  mrg 
   2981  1.1  mrg /******************************* Private *****************************************/
   2982  1.1  mrg 
   2983  1.1  mrg private:
   2984  1.1  mrg 
   2985  1.1  mrg /// Support for `__DATE__`, `__TIME__`, and `__TIMESTAMP__`
   2986  1.1  mrg private struct TimeStampInfo
   2987  1.1  mrg {
   2988  1.1  mrg     private __gshared bool initdone = false;
   2989  1.1  mrg 
   2990  1.1  mrg     // Note: Those properties need to be guarded by a call to `init`
   2991  1.1  mrg     // The API isn't safe, and quite brittle, but it was left this way
   2992  1.1  mrg     // over performance concerns.
   2993  1.1  mrg     // This is currently only called once, from the lexer.
   2994  1.1  mrg     __gshared char[11 + 1] date;
   2995  1.1  mrg     __gshared char[8 + 1] time;
   2996  1.1  mrg     __gshared char[24 + 1] timestamp;
   2997  1.1  mrg 
   2998  1.1  mrg     public static void initialize(const ref Loc loc) nothrow
   2999  1.1  mrg     {
   3000  1.1  mrg         if (initdone)
   3001  1.1  mrg             return;
   3002  1.1  mrg 
   3003  1.1  mrg         initdone = true;
   3004  1.1  mrg         time_t ct;
   3005  1.1  mrg         // https://issues.dlang.org/show_bug.cgi?id=20444
   3006  1.1  mrg         if (auto p = getenv("SOURCE_DATE_EPOCH"))
   3007  1.1  mrg         {
   3008  1.1  mrg             if (!ct.parseDigits(p.toDString()))
   3009  1.1  mrg                 error(loc, "value of environment variable `SOURCE_DATE_EPOCH` should be a valid UNIX timestamp, not: `%s`", p);
   3010  1.1  mrg         }
   3011  1.1  mrg         else
   3012  1.1  mrg             .time(&ct);
   3013  1.1  mrg         const p = ctime(&ct);
   3014  1.1  mrg         assert(p);
   3015  1.1  mrg         sprintf(&date[0], "%.6s %.4s", p + 4, p + 20);
   3016  1.1  mrg         sprintf(&time[0], "%.8s", p + 11);
   3017  1.1  mrg         sprintf(&timestamp[0], "%.24s", p);
   3018  1.1  mrg     }
   3019  1.1  mrg }
   3020  1.1  mrg 
   3021  1.1  mrg private enum LS = 0x2028;       // UTF line separator
   3022  1.1  mrg private enum PS = 0x2029;       // UTF paragraph separator
   3023  1.1  mrg 
   3024  1.1  mrg /********************************************
   3025  1.1  mrg  * Do our own char maps
   3026  1.1  mrg  */
   3027  1.1  mrg private static immutable cmtable = ()
   3028  1.1  mrg {
   3029  1.1  mrg     ubyte[256] table;
   3030  1.1  mrg     foreach (const c; 0 .. table.length)
   3031  1.1  mrg     {
   3032  1.1  mrg         if ('0' <= c && c <= '7')
   3033  1.1  mrg             table[c] |= CMoctal;
   3034  1.1  mrg         if (c_isxdigit(c))
   3035  1.1  mrg             table[c] |= CMhex;
   3036  1.1  mrg         if (c_isalnum(c) || c == '_')
   3037  1.1  mrg             table[c] |= CMidchar;
   3038  1.1  mrg 
   3039  1.1  mrg         switch (c)
   3040  1.1  mrg         {
   3041  1.1  mrg             case 'x': case 'X':
   3042  1.1  mrg             case 'b': case 'B':
   3043  1.1  mrg                 table[c] |= CMzerosecond;
   3044  1.1  mrg                 break;
   3045  1.1  mrg 
   3046  1.1  mrg             case '0': .. case '9':
   3047  1.1  mrg             case 'e': case 'E':
   3048  1.1  mrg             case 'f': case 'F':
   3049  1.1  mrg             case 'l': case 'L':
   3050  1.1  mrg             case 'p': case 'P':
   3051  1.1  mrg             case 'u': case 'U':
   3052  1.1  mrg             case 'i':
   3053  1.1  mrg             case '.':
   3054  1.1  mrg             case '_':
   3055  1.1  mrg                 table[c] |= CMzerosecond | CMdigitsecond;
   3056  1.1  mrg                 break;
   3057  1.1  mrg 
   3058  1.1  mrg             default:
   3059  1.1  mrg                 break;
   3060  1.1  mrg         }
   3061  1.1  mrg 
   3062  1.1  mrg         switch (c)
   3063  1.1  mrg         {
   3064  1.1  mrg             case '\\':
   3065  1.1  mrg             case '\n':
   3066  1.1  mrg             case '\r':
   3067  1.1  mrg             case 0:
   3068  1.1  mrg             case 0x1A:
   3069  1.1  mrg             case '\'':
   3070  1.1  mrg                 break;
   3071  1.1  mrg             default:
   3072  1.1  mrg                 if (!(c & 0x80))
   3073  1.1  mrg                     table[c] |= CMsinglechar;
   3074  1.1  mrg                 break;
   3075  1.1  mrg         }
   3076  1.1  mrg     }
   3077  1.1  mrg     return table;
   3078  1.1  mrg }();
   3079  1.1  mrg 
   3080  1.1  mrg private
   3081  1.1  mrg {
   3082  1.1  mrg     enum CMoctal  = 0x1;
   3083  1.1  mrg     enum CMhex    = 0x2;
   3084  1.1  mrg     enum CMidchar = 0x4;
   3085  1.1  mrg     enum CMzerosecond = 0x8;
   3086  1.1  mrg     enum CMdigitsecond = 0x10;
   3087  1.1  mrg     enum CMsinglechar = 0x20;
   3088  1.1  mrg }
   3089  1.1  mrg 
   3090  1.1  mrg private bool isoctal(const char c) pure @nogc @safe
   3091  1.1  mrg {
   3092  1.1  mrg     return (cmtable[c] & CMoctal) != 0;
   3093  1.1  mrg }
   3094  1.1  mrg 
   3095  1.1  mrg private bool ishex(const char c) pure @nogc @safe
   3096  1.1  mrg {
   3097  1.1  mrg     return (cmtable[c] & CMhex) != 0;
   3098  1.1  mrg }
   3099  1.1  mrg 
   3100  1.1  mrg private bool isidchar(const char c) pure @nogc @safe
   3101  1.1  mrg {
   3102  1.1  mrg     return (cmtable[c] & CMidchar) != 0;
   3103  1.1  mrg }
   3104  1.1  mrg 
   3105  1.1  mrg private bool isZeroSecond(const char c) pure @nogc @safe
   3106  1.1  mrg {
   3107  1.1  mrg     return (cmtable[c] & CMzerosecond) != 0;
   3108  1.1  mrg }
   3109  1.1  mrg 
   3110  1.1  mrg private bool isDigitSecond(const char c) pure @nogc @safe
   3111  1.1  mrg {
   3112  1.1  mrg     return (cmtable[c] & CMdigitsecond) != 0;
   3113  1.1  mrg }
   3114  1.1  mrg 
   3115  1.1  mrg private bool issinglechar(const char c) pure @nogc @safe
   3116  1.1  mrg {
   3117  1.1  mrg     return (cmtable[c] & CMsinglechar) != 0;
   3118  1.1  mrg }
   3119  1.1  mrg 
   3120  1.1  mrg private bool c_isxdigit(const int c) pure @nogc @safe
   3121  1.1  mrg {
   3122  1.1  mrg     return (( c >= '0' && c <= '9') ||
   3123  1.1  mrg             ( c >= 'a' && c <= 'f') ||
   3124  1.1  mrg             ( c >= 'A' && c <= 'F'));
   3125  1.1  mrg }
   3126  1.1  mrg 
   3127  1.1  mrg private bool c_isalnum(const int c) pure @nogc @safe
   3128  1.1  mrg {
   3129  1.1  mrg     return (( c >= '0' && c <= '9') ||
   3130  1.1  mrg             ( c >= 'a' && c <= 'z') ||
   3131  1.1  mrg             ( c >= 'A' && c <= 'Z'));
   3132  1.1  mrg }
   3133  1.1  mrg 
   3134  1.1  mrg /******************************* Unittest *****************************************/
   3135  1.1  mrg 
   3136  1.1  mrg unittest
   3137  1.1  mrg {
   3138  1.1  mrg     import dmd.console;
   3139  1.1  mrg     nothrow bool assertDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header,
   3140  1.1  mrg                                    const(char)* format, va_list ap, const(char)* p1, const(char)* p2)
   3141  1.1  mrg     {
   3142  1.1  mrg         assert(0);
   3143  1.1  mrg     }
   3144  1.1  mrg     diagnosticHandler = &assertDiagnosticHandler;
   3145  1.1  mrg 
   3146  1.1  mrg     static void test(T)(string sequence, T expected, bool Ccompile = false)
   3147  1.1  mrg     {
   3148  1.1  mrg         auto p = cast(const(char)*)sequence.ptr;
   3149  1.1  mrg         assert(expected == Lexer.escapeSequence(Loc.initial, p, Ccompile));
   3150  1.1  mrg         assert(p == sequence.ptr + sequence.length);
   3151  1.1  mrg     }
   3152  1.1  mrg 
   3153  1.1  mrg     test(`'`, '\'');
   3154  1.1  mrg     test(`"`, '"');
   3155  1.1  mrg     test(`?`, '?');
   3156  1.1  mrg     test(`\`, '\\');
   3157  1.1  mrg     test(`0`, '\0');
   3158  1.1  mrg     test(`a`, '\a');
   3159  1.1  mrg     test(`b`, '\b');
   3160  1.1  mrg     test(`f`, '\f');
   3161  1.1  mrg     test(`n`, '\n');
   3162  1.1  mrg     test(`r`, '\r');
   3163  1.1  mrg     test(`t`, '\t');
   3164  1.1  mrg     test(`v`, '\v');
   3165  1.1  mrg 
   3166  1.1  mrg     test(`x00`, 0x00);
   3167  1.1  mrg     test(`xff`, 0xff);
   3168  1.1  mrg     test(`xFF`, 0xff);
   3169  1.1  mrg     test(`xa7`, 0xa7);
   3170  1.1  mrg     test(`x3c`, 0x3c);
   3171  1.1  mrg     test(`xe2`, 0xe2);
   3172  1.1  mrg 
   3173  1.1  mrg     test(`1`, '\1');
   3174  1.1  mrg     test(`42`, '\42');
   3175  1.1  mrg     test(`357`, '\357');
   3176  1.1  mrg 
   3177  1.1  mrg     test(`u1234`, '\u1234');
   3178  1.1  mrg     test(`uf0e4`, '\uf0e4');
   3179  1.1  mrg 
   3180  1.1  mrg     test(`U0001f603`, '\U0001f603');
   3181  1.1  mrg 
   3182  1.1  mrg     test(`&quot;`, '"');
   3183  1.1  mrg     test(`&lt;`, '<');
   3184  1.1  mrg     test(`&gt;`, '>');
   3185  1.1  mrg 
   3186  1.1  mrg     diagnosticHandler = null;
   3187  1.1  mrg }
   3188  1.1  mrg 
   3189  1.1  mrg unittest
   3190  1.1  mrg {
   3191  1.1  mrg     import dmd.console;
   3192  1.1  mrg     string expected;
   3193  1.1  mrg     bool gotError;
   3194  1.1  mrg 
   3195  1.1  mrg     nothrow bool expectDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header,
   3196  1.1  mrg                                          const(char)* format, va_list ap, const(char)* p1, const(char)* p2)
   3197  1.1  mrg     {
   3198  1.1  mrg         assert(cast(Classification)headerColor == Classification.error);
   3199  1.1  mrg 
   3200  1.1  mrg         gotError = true;
   3201  1.1  mrg         char[100] buffer = void;
   3202  1.1  mrg         auto actual = buffer[0 .. vsprintf(buffer.ptr, format, ap)];
   3203  1.1  mrg         assert(expected == actual);
   3204  1.1  mrg         return true;
   3205  1.1  mrg     }
   3206  1.1  mrg 
   3207  1.1  mrg     diagnosticHandler = &expectDiagnosticHandler;
   3208  1.1  mrg 
   3209  1.1  mrg     void test(string sequence, string expectedError, dchar expectedReturnValue, uint expectedScanLength, bool Ccompile = false)
   3210  1.1  mrg     {
   3211  1.1  mrg         uint errors = global.errors;
   3212  1.1  mrg         gotError = false;
   3213  1.1  mrg         expected = expectedError;
   3214  1.1  mrg         auto p = cast(const(char)*)sequence.ptr;
   3215  1.1  mrg         auto actualReturnValue = Lexer.escapeSequence(Loc.initial, p, Ccompile);
   3216  1.1  mrg         assert(gotError);
   3217  1.1  mrg         assert(expectedReturnValue == actualReturnValue);
   3218  1.1  mrg 
   3219  1.1  mrg         auto actualScanLength = p - sequence.ptr;
   3220  1.1  mrg         assert(expectedScanLength == actualScanLength);
   3221  1.1  mrg         global.errors = errors;
   3222  1.1  mrg     }
   3223  1.1  mrg 
   3224  1.1  mrg     test("c", `undefined escape sequence \c`, 'c', 1);
   3225  1.1  mrg     test("!", `undefined escape sequence \!`, '!', 1);
   3226  1.1  mrg     test("&quot;", `undefined escape sequence \&`, '&', 1, true);
   3227  1.1  mrg 
   3228  1.1  mrg     test("x1", `escape hex sequence has 1 hex digits instead of 2`, '\x01', 2);
   3229  1.1  mrg 
   3230  1.1  mrg     test("u1"  , `escape hex sequence has 1 hex digits instead of 4`,   0x1, 2);
   3231  1.1  mrg     test("u12" , `escape hex sequence has 2 hex digits instead of 4`,  0x12, 3);
   3232  1.1  mrg     test("u123", `escape hex sequence has 3 hex digits instead of 4`, 0x123, 4);
   3233  1.1  mrg 
   3234  1.1  mrg     test("U0"      , `escape hex sequence has 1 hex digits instead of 8`,       0x0, 2);
   3235  1.1  mrg     test("U00"     , `escape hex sequence has 2 hex digits instead of 8`,      0x00, 3);
   3236  1.1  mrg     test("U000"    , `escape hex sequence has 3 hex digits instead of 8`,     0x000, 4);
   3237  1.1  mrg     test("U0000"   , `escape hex sequence has 4 hex digits instead of 8`,    0x0000, 5);
   3238  1.1  mrg     test("U0001f"  , `escape hex sequence has 5 hex digits instead of 8`,   0x0001f, 6);
   3239  1.1  mrg     test("U0001f6" , `escape hex sequence has 6 hex digits instead of 8`,  0x0001f6, 7);
   3240  1.1  mrg     test("U0001f60", `escape hex sequence has 7 hex digits instead of 8`, 0x0001f60, 8);
   3241  1.1  mrg 
   3242  1.1  mrg     test("ud800"    , `invalid UTF character \U0000d800`, '?', 5);
   3243  1.1  mrg     test("udfff"    , `invalid UTF character \U0000dfff`, '?', 5);
   3244  1.1  mrg     test("U00110000", `invalid UTF character \U00110000`, '?', 9);
   3245  1.1  mrg 
   3246  1.1  mrg     test("xg0"      , `undefined escape hex sequence \xg`, 'g', 2);
   3247  1.1  mrg     test("ug000"    , `undefined escape hex sequence \ug`, 'g', 2);
   3248  1.1  mrg     test("Ug0000000", `undefined escape hex sequence \Ug`, 'g', 2);
   3249  1.1  mrg 
   3250  1.1  mrg     test("&BAD;", `unnamed character entity &BAD;`  , '?', 5);
   3251  1.1  mrg     test("&quot", `unterminated named entity &quot;`, '?', 5);
   3252  1.1  mrg     test("&quot", `unterminated named entity &quot;`, '?', 5);
   3253  1.1  mrg 
   3254  1.1  mrg     test("400", `escape octal sequence \400 is larger than \377`, 0x100, 3);
   3255  1.1  mrg 
   3256  1.1  mrg     diagnosticHandler = null;
   3257  1.1  mrg }
   3258  1.1  mrg 
   3259  1.1  mrg unittest
   3260  1.1  mrg {
   3261  1.1  mrg     //printf("lexer.unittest\n");
   3262  1.1  mrg     /* Not much here, just trying things out.
   3263  1.1  mrg      */
   3264  1.1  mrg     string text = "int"; // We rely on the implicit null-terminator
   3265  1.1  mrg     scope Lexer lex1 = new Lexer(null, text.ptr, 0, text.length, 0, 0);
   3266  1.1  mrg     TOK tok;
   3267  1.1  mrg     tok = lex1.nextToken();
   3268  1.1  mrg     //printf("tok == %s, %d, %d\n", Token::toChars(tok), tok, TOK.int32);
   3269  1.1  mrg     assert(tok == TOK.int32);
   3270  1.1  mrg     tok = lex1.nextToken();
   3271  1.1  mrg     assert(tok == TOK.endOfFile);
   3272  1.1  mrg     tok = lex1.nextToken();
   3273  1.1  mrg     assert(tok == TOK.endOfFile);
   3274  1.1  mrg     tok = lex1.nextToken();
   3275  1.1  mrg     assert(tok == TOK.endOfFile);
   3276  1.1  mrg }
   3277  1.1  mrg 
   3278  1.1  mrg unittest
   3279  1.1  mrg {
   3280  1.1  mrg     // We don't want to see Lexer error output during these tests.
   3281  1.1  mrg     uint errors = global.startGagging();
   3282  1.1  mrg     scope(exit) global.endGagging(errors);
   3283  1.1  mrg 
   3284  1.1  mrg     // Test malformed input: even malformed input should end in a TOK.endOfFile.
   3285  1.1  mrg     static immutable char[][] testcases =
   3286  1.1  mrg     [   // Testcase must end with 0 or 0x1A.
   3287  1.1  mrg         [0], // not malformed, but pathological
   3288  1.1  mrg         ['\'', 0],
   3289  1.1  mrg         ['\'', 0x1A],
   3290  1.1  mrg         ['{', '{', 'q', '{', 0],
   3291  1.1  mrg         [0xFF, 0],
   3292  1.1  mrg         [0xFF, 0x80, 0],
   3293  1.1  mrg         [0xFF, 0xFF, 0],
   3294  1.1  mrg         [0xFF, 0xFF, 0],
   3295  1.1  mrg         ['x', '"', 0x1A],
   3296  1.1  mrg     ];
   3297  1.1  mrg 
   3298  1.1  mrg     foreach (testcase; testcases)
   3299  1.1  mrg     {
   3300  1.1  mrg         scope Lexer lex2 = new Lexer(null, testcase.ptr, 0, testcase.length-1, 0, 0);
   3301  1.1  mrg         TOK tok = lex2.nextToken();
   3302  1.1  mrg         size_t iterations = 1;
   3303  1.1  mrg         while ((tok != TOK.endOfFile) && (iterations++ < testcase.length))
   3304  1.1  mrg         {
   3305  1.1  mrg             tok = lex2.nextToken();
   3306  1.1  mrg         }
   3307  1.1  mrg         assert(tok == TOK.endOfFile);
   3308  1.1  mrg         tok = lex2.nextToken();
   3309  1.1  mrg         assert(tok == TOK.endOfFile);
   3310  1.1  mrg     }
   3311  1.1  mrg }
   3312