1 1.1 mrg /** 2 1.1 mrg * Implements the lexical analyzer, which converts source code into lexical tokens. 3 1.1 mrg * 4 1.1 mrg * Specification: $(LINK2 https://dlang.org/spec/lex.html, Lexical) 5 1.1 mrg * 6 1.1 mrg * Copyright: Copyright (C) 1999-2022 by The D Language Foundation, All Rights Reserved 7 1.1 mrg * Authors: $(LINK2 https://www.digitalmars.com, Walter Bright) 8 1.1 mrg * License: $(LINK2 https://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) 9 1.1 mrg * Source: $(LINK2 https://github.com/dlang/dmd/blob/master/src/dmd/lexer.d, _lexer.d) 10 1.1 mrg * Documentation: https://dlang.org/phobos/dmd_lexer.html 11 1.1 mrg * Coverage: https://codecov.io/gh/dlang/dmd/src/master/src/dmd/lexer.d 12 1.1 mrg */ 13 1.1 mrg 14 1.1 mrg module dmd.lexer; 15 1.1 mrg 16 1.1 mrg import core.stdc.ctype; 17 1.1 mrg import core.stdc.errno; 18 1.1 mrg import core.stdc.stdarg; 19 1.1 mrg import core.stdc.stdio; 20 1.1 mrg import core.stdc.stdlib : getenv; 21 1.1 mrg import core.stdc.string; 22 1.1 mrg import core.stdc.time; 23 1.1 mrg 24 1.1 mrg import dmd.entity; 25 1.1 mrg import dmd.errors; 26 1.1 mrg import dmd.globals; 27 1.1 mrg import dmd.id; 28 1.1 mrg import dmd.identifier; 29 1.1 mrg import dmd.root.array; 30 1.1 mrg import dmd.root.ctfloat; 31 1.1 mrg import dmd.common.outbuffer; 32 1.1 mrg import dmd.root.port; 33 1.1 mrg import dmd.root.rmem; 34 1.1 mrg import dmd.root.string; 35 1.1 mrg import dmd.root.utf; 36 1.1 mrg import dmd.tokens; 37 1.1 mrg import dmd.utils; 38 1.1 mrg 39 1.1 mrg nothrow: 40 1.1 mrg 41 1.1 mrg version (DMDLIB) 42 1.1 mrg { 43 1.1 mrg version = LocOffset; 44 1.1 mrg } 45 1.1 mrg 46 1.1 mrg /*********************************************************** 47 1.1 mrg */ 48 1.1 mrg class Lexer 49 1.1 mrg { 50 1.1 mrg private __gshared OutBuffer stringbuffer; 51 1.1 mrg 52 1.1 mrg Loc scanloc; // for error messages 53 1.1 mrg Loc prevloc; // location of token before current 54 1.1 mrg 55 1.1 mrg const(char)* p; // current character 56 1.1 mrg 57 1.1 mrg Token token; 58 1.1 mrg 59 1.1 mrg // For ImportC 60 1.1 mrg bool Ccompile; /// true if compiling ImportC 61 1.1 mrg 62 1.1 mrg // The following are valid only if (Ccompile == true) 63 1.1 mrg ubyte boolsize; /// size of a C _Bool, default 1 64 1.1 mrg ubyte shortsize; /// size of a C short, default 2 65 1.1 mrg ubyte intsize; /// size of a C int, default 4 66 1.1 mrg ubyte longsize; /// size of C long, 4 or 8 67 1.1 mrg ubyte long_longsize; /// size of a C long long, default 8 68 1.1 mrg ubyte long_doublesize; /// size of C long double, 8 or D real.sizeof 69 1.1 mrg ubyte wchar_tsize; /// size of C wchar_t, 2 or 4 70 1.1 mrg 71 1.1 mrg private 72 1.1 mrg { 73 1.1 mrg const(char)* base; // pointer to start of buffer 74 1.1 mrg const(char)* end; // pointer to last element of buffer 75 1.1 mrg const(char)* line; // start of current line 76 1.1 mrg 77 1.1 mrg bool doDocComment; // collect doc comment information 78 1.1 mrg bool anyToken; // seen at least one token 79 1.1 mrg bool commentToken; // comments are TOK.comment's 80 1.1 mrg bool tokenizeNewlines; // newlines are turned into TOK.endOfLine's 81 1.1 mrg 82 1.1 mrg version (DMDLIB) 83 1.1 mrg { 84 1.1 mrg bool whitespaceToken; // tokenize whitespaces 85 1.1 mrg } 86 1.1 mrg 87 1.1 mrg int inTokenStringConstant; // can be larger than 1 when in nested q{} strings 88 1.1 mrg int lastDocLine; // last line of previous doc comment 89 1.1 mrg 90 1.1 mrg Token* tokenFreelist; 91 1.1 mrg } 92 1.1 mrg 93 1.1 mrg nothrow: 94 1.1 mrg 95 1.1 mrg /********************* 96 1.1 mrg * Creates a Lexer for the source code base[begoffset..endoffset+1]. 97 1.1 mrg * The last character, base[endoffset], must be null (0) or EOF (0x1A). 98 1.1 mrg * 99 1.1 mrg * Params: 100 1.1 mrg * filename = used for error messages 101 1.1 mrg * base = source code, must be terminated by a null (0) or EOF (0x1A) character 102 1.1 mrg * begoffset = starting offset into base[] 103 1.1 mrg * endoffset = the last offset to read into base[] 104 1.1 mrg * doDocComment = handle documentation comments 105 1.1 mrg * commentToken = comments become TOK.comment's 106 1.1 mrg */ 107 1.1 mrg this(const(char)* filename, const(char)* base, size_t begoffset, 108 1.1 mrg size_t endoffset, bool doDocComment, bool commentToken) pure 109 1.1 mrg { 110 1.1 mrg scanloc = Loc(filename, 1, 1); 111 1.1 mrg // debug printf("Lexer::Lexer(%p)\n", base); 112 1.1 mrg // debug printf("lexer.filename = %s\n", filename); 113 1.1 mrg token = Token.init; 114 1.1 mrg this.base = base; 115 1.1 mrg this.end = base + endoffset; 116 1.1 mrg p = base + begoffset; 117 1.1 mrg line = p; 118 1.1 mrg this.doDocComment = doDocComment; 119 1.1 mrg this.commentToken = commentToken; 120 1.1 mrg this.tokenizeNewlines = false; 121 1.1 mrg this.inTokenStringConstant = 0; 122 1.1 mrg this.lastDocLine = 0; 123 1.1 mrg //initKeywords(); 124 1.1 mrg /* If first line starts with '#!', ignore the line 125 1.1 mrg */ 126 1.1 mrg if (p && p[0] == '#' && p[1] == '!') 127 1.1 mrg { 128 1.1 mrg p += 2; 129 1.1 mrg while (1) 130 1.1 mrg { 131 1.1 mrg char c = *p++; 132 1.1 mrg switch (c) 133 1.1 mrg { 134 1.1 mrg case 0: 135 1.1 mrg case 0x1A: 136 1.1 mrg p--; 137 1.1 mrg goto case; 138 1.1 mrg case '\n': 139 1.1 mrg break; 140 1.1 mrg default: 141 1.1 mrg continue; 142 1.1 mrg } 143 1.1 mrg break; 144 1.1 mrg } 145 1.1 mrg endOfLine(); 146 1.1 mrg } 147 1.1 mrg } 148 1.1 mrg 149 1.1 mrg version (DMDLIB) 150 1.1 mrg { 151 1.1 mrg this(const(char)* filename, const(char)* base, size_t begoffset, size_t endoffset, 152 1.1 mrg bool doDocComment, bool commentToken, bool whitespaceToken) 153 1.1 mrg { 154 1.1 mrg this(filename, base, begoffset, endoffset, doDocComment, commentToken); 155 1.1 mrg this.whitespaceToken = whitespaceToken; 156 1.1 mrg } 157 1.1 mrg 158 1.1 mrg bool empty() const pure @property @nogc @safe 159 1.1 mrg { 160 1.1 mrg return front() == TOK.endOfFile; 161 1.1 mrg } 162 1.1 mrg 163 1.1 mrg TOK front() const pure @property @nogc @safe 164 1.1 mrg { 165 1.1 mrg return token.value; 166 1.1 mrg } 167 1.1 mrg 168 1.1 mrg void popFront() 169 1.1 mrg { 170 1.1 mrg nextToken(); 171 1.1 mrg } 172 1.1 mrg } 173 1.1 mrg 174 1.1 mrg /// Returns: a newly allocated `Token`. 175 1.1 mrg Token* allocateToken() pure nothrow @safe 176 1.1 mrg { 177 1.1 mrg if (tokenFreelist) 178 1.1 mrg { 179 1.1 mrg Token* t = tokenFreelist; 180 1.1 mrg tokenFreelist = t.next; 181 1.1 mrg t.next = null; 182 1.1 mrg return t; 183 1.1 mrg } 184 1.1 mrg return new Token(); 185 1.1 mrg } 186 1.1 mrg 187 1.1 mrg /// Frees the given token by returning it to the freelist. 188 1.1 mrg private void releaseToken(Token* token) pure nothrow @nogc @safe 189 1.1 mrg { 190 1.1 mrg if (mem.isGCEnabled) 191 1.1 mrg *token = Token.init; 192 1.1 mrg token.next = tokenFreelist; 193 1.1 mrg tokenFreelist = token; 194 1.1 mrg } 195 1.1 mrg 196 1.1 mrg final TOK nextToken() 197 1.1 mrg { 198 1.1 mrg prevloc = token.loc; 199 1.1 mrg if (token.next) 200 1.1 mrg { 201 1.1 mrg Token* t = token.next; 202 1.1 mrg memcpy(&token, t, Token.sizeof); 203 1.1 mrg releaseToken(t); 204 1.1 mrg } 205 1.1 mrg else 206 1.1 mrg { 207 1.1 mrg scan(&token); 208 1.1 mrg } 209 1.1 mrg //printf(token.toChars()); 210 1.1 mrg return token.value; 211 1.1 mrg } 212 1.1 mrg 213 1.1 mrg /*********************** 214 1.1 mrg * Look ahead at next token's value. 215 1.1 mrg */ 216 1.1 mrg final TOK peekNext() 217 1.1 mrg { 218 1.1 mrg return peek(&token).value; 219 1.1 mrg } 220 1.1 mrg 221 1.1 mrg /*********************** 222 1.1 mrg * Look 2 tokens ahead at value. 223 1.1 mrg */ 224 1.1 mrg final TOK peekNext2() 225 1.1 mrg { 226 1.1 mrg Token* t = peek(&token); 227 1.1 mrg return peek(t).value; 228 1.1 mrg } 229 1.1 mrg 230 1.1 mrg /**************************** 231 1.1 mrg * Turn next token in buffer into a token. 232 1.1 mrg * Params: 233 1.1 mrg * t = the token to set the resulting Token to 234 1.1 mrg */ 235 1.1 mrg final void scan(Token* t) 236 1.1 mrg { 237 1.1 mrg const lastLine = scanloc.linnum; 238 1.1 mrg Loc startLoc; 239 1.1 mrg t.blockComment = null; 240 1.1 mrg t.lineComment = null; 241 1.1 mrg 242 1.1 mrg while (1) 243 1.1 mrg { 244 1.1 mrg t.ptr = p; 245 1.1 mrg //printf("p = %p, *p = '%c'\n",p,*p); 246 1.1 mrg t.loc = loc(); 247 1.1 mrg switch (*p) 248 1.1 mrg { 249 1.1 mrg case 0: 250 1.1 mrg case 0x1A: 251 1.1 mrg t.value = TOK.endOfFile; // end of file 252 1.1 mrg // Intentionally not advancing `p`, such that subsequent calls keep returning TOK.endOfFile. 253 1.1 mrg return; 254 1.1 mrg case ' ': 255 1.1 mrg // Skip 4 spaces at a time after aligning 'p' to a 4-byte boundary. 256 1.1 mrg while ((cast(size_t)p) % uint.sizeof) 257 1.1 mrg { 258 1.1 mrg if (*p != ' ') 259 1.1 mrg goto LendSkipFourSpaces; 260 1.1 mrg p++; 261 1.1 mrg } 262 1.1 mrg while (*(cast(uint*)p) == 0x20202020) // ' ' == 0x20 263 1.1 mrg p += 4; 264 1.1 mrg // Skip over any remaining space on the line. 265 1.1 mrg while (*p == ' ') 266 1.1 mrg p++; 267 1.1 mrg LendSkipFourSpaces: 268 1.1 mrg version (DMDLIB) 269 1.1 mrg { 270 1.1 mrg if (whitespaceToken) 271 1.1 mrg { 272 1.1 mrg t.value = TOK.whitespace; 273 1.1 mrg return; 274 1.1 mrg } 275 1.1 mrg } 276 1.1 mrg continue; // skip white space 277 1.1 mrg case '\t': 278 1.1 mrg case '\v': 279 1.1 mrg case '\f': 280 1.1 mrg p++; 281 1.1 mrg version (DMDLIB) 282 1.1 mrg { 283 1.1 mrg if (whitespaceToken) 284 1.1 mrg { 285 1.1 mrg t.value = TOK.whitespace; 286 1.1 mrg return; 287 1.1 mrg } 288 1.1 mrg } 289 1.1 mrg continue; // skip white space 290 1.1 mrg case '\r': 291 1.1 mrg p++; 292 1.1 mrg if (*p != '\n') // if CR stands by itself 293 1.1 mrg { 294 1.1 mrg endOfLine(); 295 1.1 mrg if (tokenizeNewlines) 296 1.1 mrg { 297 1.1 mrg t.value = TOK.endOfLine; 298 1.1 mrg tokenizeNewlines = false; 299 1.1 mrg return; 300 1.1 mrg } 301 1.1 mrg } 302 1.1 mrg version (DMDLIB) 303 1.1 mrg { 304 1.1 mrg if (whitespaceToken) 305 1.1 mrg { 306 1.1 mrg t.value = TOK.whitespace; 307 1.1 mrg return; 308 1.1 mrg } 309 1.1 mrg } 310 1.1 mrg continue; // skip white space 311 1.1 mrg case '\n': 312 1.1 mrg p++; 313 1.1 mrg endOfLine(); 314 1.1 mrg if (tokenizeNewlines) 315 1.1 mrg { 316 1.1 mrg t.value = TOK.endOfLine; 317 1.1 mrg tokenizeNewlines = false; 318 1.1 mrg return; 319 1.1 mrg } 320 1.1 mrg version (DMDLIB) 321 1.1 mrg { 322 1.1 mrg if (whitespaceToken) 323 1.1 mrg { 324 1.1 mrg t.value = TOK.whitespace; 325 1.1 mrg return; 326 1.1 mrg } 327 1.1 mrg } 328 1.1 mrg continue; // skip white space 329 1.1 mrg case '0': 330 1.1 mrg if (!isZeroSecond(p[1])) // if numeric literal does not continue 331 1.1 mrg { 332 1.1 mrg ++p; 333 1.1 mrg t.unsvalue = 0; 334 1.1 mrg t.value = TOK.int32Literal; 335 1.1 mrg return; 336 1.1 mrg } 337 1.1 mrg goto Lnumber; 338 1.1 mrg 339 1.1 mrg case '1': .. case '9': 340 1.1 mrg if (!isDigitSecond(p[1])) // if numeric literal does not continue 341 1.1 mrg { 342 1.1 mrg t.unsvalue = *p - '0'; 343 1.1 mrg ++p; 344 1.1 mrg t.value = TOK.int32Literal; 345 1.1 mrg return; 346 1.1 mrg } 347 1.1 mrg Lnumber: 348 1.1 mrg t.value = number(t); 349 1.1 mrg return; 350 1.1 mrg 351 1.1 mrg case '\'': 352 1.1 mrg if (issinglechar(p[1]) && p[2] == '\'') 353 1.1 mrg { 354 1.1 mrg t.unsvalue = p[1]; // simple one character literal 355 1.1 mrg t.value = TOK.charLiteral; 356 1.1 mrg p += 3; 357 1.1 mrg } 358 1.1 mrg else if (Ccompile) 359 1.1 mrg { 360 1.1 mrg clexerCharConstant(*t, 0); 361 1.1 mrg } 362 1.1 mrg else 363 1.1 mrg { 364 1.1 mrg t.value = charConstant(t); 365 1.1 mrg } 366 1.1 mrg return; 367 1.1 mrg 368 1.1 mrg case 'u': 369 1.1 mrg case 'U': 370 1.1 mrg case 'L': 371 1.1 mrg if (!Ccompile) 372 1.1 mrg goto case_ident; 373 1.1 mrg if (p[1] == '\'') // C wide character constant 374 1.1 mrg { 375 1.1 mrg char c = *p; 376 1.1 mrg if (c == 'L') // convert L to u or U 377 1.1 mrg c = (wchar_tsize == 4) ? 'u' : 'U'; 378 1.1 mrg ++p; 379 1.1 mrg clexerCharConstant(*t, c); 380 1.1 mrg return; 381 1.1 mrg } 382 1.1 mrg else if (p[1] == '\"') // C wide string literal 383 1.1 mrg { 384 1.1 mrg const c = *p; 385 1.1 mrg ++p; 386 1.1 mrg escapeStringConstant(t); 387 1.1 mrg t.postfix = c == 'L' ? (wchar_tsize == 2 ? 'w' : 'd') : 388 1.1 mrg c == 'u' ? 'w' : 389 1.1 mrg 'd'; 390 1.1 mrg return; 391 1.1 mrg } 392 1.1 mrg else if (p[1] == '8' && p[2] == '\"') // C UTF-8 string literal 393 1.1 mrg { 394 1.1 mrg p += 2; 395 1.1 mrg escapeStringConstant(t); 396 1.1 mrg return; 397 1.1 mrg } 398 1.1 mrg goto case_ident; 399 1.1 mrg 400 1.1 mrg case 'r': 401 1.1 mrg if (Ccompile || p[1] != '"') 402 1.1 mrg goto case_ident; 403 1.1 mrg p++; 404 1.1 mrg goto case '`'; 405 1.1 mrg case '`': 406 1.1 mrg if (Ccompile) 407 1.1 mrg goto default; 408 1.1 mrg wysiwygStringConstant(t); 409 1.1 mrg return; 410 1.1 mrg case 'q': 411 1.1 mrg if (Ccompile) 412 1.1 mrg goto case_ident; 413 1.1 mrg if (p[1] == '"') 414 1.1 mrg { 415 1.1 mrg p++; 416 1.1 mrg delimitedStringConstant(t); 417 1.1 mrg return; 418 1.1 mrg } 419 1.1 mrg else if (p[1] == '{') 420 1.1 mrg { 421 1.1 mrg p++; 422 1.1 mrg tokenStringConstant(t); 423 1.1 mrg return; 424 1.1 mrg } 425 1.1 mrg else 426 1.1 mrg goto case_ident; 427 1.1 mrg case '"': 428 1.1 mrg escapeStringConstant(t); 429 1.1 mrg return; 430 1.1 mrg case 'a': 431 1.1 mrg case 'b': 432 1.1 mrg case 'c': 433 1.1 mrg case 'd': 434 1.1 mrg case 'e': 435 1.1 mrg case 'f': 436 1.1 mrg case 'g': 437 1.1 mrg case 'h': 438 1.1 mrg case 'i': 439 1.1 mrg case 'j': 440 1.1 mrg case 'k': 441 1.1 mrg case 'l': 442 1.1 mrg case 'm': 443 1.1 mrg case 'n': 444 1.1 mrg case 'o': 445 1.1 mrg case 'p': 446 1.1 mrg /*case 'q': case 'r':*/ 447 1.1 mrg case 's': 448 1.1 mrg case 't': 449 1.1 mrg //case 'u': 450 1.1 mrg case 'v': 451 1.1 mrg case 'w': 452 1.1 mrg case 'x': 453 1.1 mrg case 'y': 454 1.1 mrg case 'z': 455 1.1 mrg case 'A': 456 1.1 mrg case 'B': 457 1.1 mrg case 'C': 458 1.1 mrg case 'D': 459 1.1 mrg case 'E': 460 1.1 mrg case 'F': 461 1.1 mrg case 'G': 462 1.1 mrg case 'H': 463 1.1 mrg case 'I': 464 1.1 mrg case 'J': 465 1.1 mrg case 'K': 466 1.1 mrg //case 'L': 467 1.1 mrg case 'M': 468 1.1 mrg case 'N': 469 1.1 mrg case 'O': 470 1.1 mrg case 'P': 471 1.1 mrg case 'Q': 472 1.1 mrg case 'R': 473 1.1 mrg case 'S': 474 1.1 mrg case 'T': 475 1.1 mrg //case 'U': 476 1.1 mrg case 'V': 477 1.1 mrg case 'W': 478 1.1 mrg case 'X': 479 1.1 mrg case 'Y': 480 1.1 mrg case 'Z': 481 1.1 mrg case '_': 482 1.1 mrg case_ident: 483 1.1 mrg { 484 1.1 mrg while (1) 485 1.1 mrg { 486 1.1 mrg const c = *++p; 487 1.1 mrg if (isidchar(c)) 488 1.1 mrg continue; 489 1.1 mrg else if (c & 0x80) 490 1.1 mrg { 491 1.1 mrg const s = p; 492 1.1 mrg const u = decodeUTF(); 493 1.1 mrg if (isUniAlpha(u)) 494 1.1 mrg continue; 495 1.1 mrg error("char 0x%04x not allowed in identifier", u); 496 1.1 mrg p = s; 497 1.1 mrg } 498 1.1 mrg break; 499 1.1 mrg } 500 1.1 mrg Identifier id = Identifier.idPool(cast(char*)t.ptr, cast(uint)(p - t.ptr)); 501 1.1 mrg t.ident = id; 502 1.1 mrg t.value = cast(TOK)id.getValue(); 503 1.1 mrg 504 1.1 mrg anyToken = 1; 505 1.1 mrg 506 1.1 mrg /* Different keywords for C and D 507 1.1 mrg */ 508 1.1 mrg if (Ccompile) 509 1.1 mrg { 510 1.1 mrg if (t.value != TOK.identifier) 511 1.1 mrg { 512 1.1 mrg t.value = Ckeywords[t.value]; // filter out D keywords 513 1.1 mrg } 514 1.1 mrg } 515 1.1 mrg else if (t.value >= FirstCKeyword) 516 1.1 mrg t.value = TOK.identifier; // filter out C keywords 517 1.1 mrg 518 1.1 mrg else if (*t.ptr == '_') // if special identifier token 519 1.1 mrg { 520 1.1 mrg // Lazy initialization 521 1.1 mrg TimeStampInfo.initialize(t.loc); 522 1.1 mrg 523 1.1 mrg if (id == Id.DATE) 524 1.1 mrg { 525 1.1 mrg t.ustring = TimeStampInfo.date.ptr; 526 1.1 mrg goto Lstr; 527 1.1 mrg } 528 1.1 mrg else if (id == Id.TIME) 529 1.1 mrg { 530 1.1 mrg t.ustring = TimeStampInfo.time.ptr; 531 1.1 mrg goto Lstr; 532 1.1 mrg } 533 1.1 mrg else if (id == Id.VENDOR) 534 1.1 mrg { 535 1.1 mrg t.ustring = global.vendor.xarraydup.ptr; 536 1.1 mrg goto Lstr; 537 1.1 mrg } 538 1.1 mrg else if (id == Id.TIMESTAMP) 539 1.1 mrg { 540 1.1 mrg t.ustring = TimeStampInfo.timestamp.ptr; 541 1.1 mrg Lstr: 542 1.1 mrg t.value = TOK.string_; 543 1.1 mrg t.postfix = 0; 544 1.1 mrg t.len = cast(uint)strlen(t.ustring); 545 1.1 mrg } 546 1.1 mrg else if (id == Id.VERSIONX) 547 1.1 mrg { 548 1.1 mrg t.value = TOK.int64Literal; 549 1.1 mrg t.unsvalue = global.versionNumber(); 550 1.1 mrg } 551 1.1 mrg else if (id == Id.EOFX) 552 1.1 mrg { 553 1.1 mrg t.value = TOK.endOfFile; 554 1.1 mrg // Advance scanner to end of file 555 1.1 mrg while (!(*p == 0 || *p == 0x1A)) 556 1.1 mrg p++; 557 1.1 mrg } 558 1.1 mrg } 559 1.1 mrg //printf("t.value = %d\n",t.value); 560 1.1 mrg return; 561 1.1 mrg } 562 1.1 mrg case '/': 563 1.1 mrg p++; 564 1.1 mrg switch (*p) 565 1.1 mrg { 566 1.1 mrg case '=': 567 1.1 mrg p++; 568 1.1 mrg t.value = TOK.divAssign; 569 1.1 mrg return; 570 1.1 mrg case '*': 571 1.1 mrg p++; 572 1.1 mrg startLoc = loc(); 573 1.1 mrg while (1) 574 1.1 mrg { 575 1.1 mrg while (1) 576 1.1 mrg { 577 1.1 mrg const c = *p; 578 1.1 mrg switch (c) 579 1.1 mrg { 580 1.1 mrg case '/': 581 1.1 mrg break; 582 1.1 mrg case '\n': 583 1.1 mrg endOfLine(); 584 1.1 mrg p++; 585 1.1 mrg continue; 586 1.1 mrg case '\r': 587 1.1 mrg p++; 588 1.1 mrg if (*p != '\n') 589 1.1 mrg endOfLine(); 590 1.1 mrg continue; 591 1.1 mrg case 0: 592 1.1 mrg case 0x1A: 593 1.1 mrg error("unterminated /* */ comment"); 594 1.1 mrg p = end; 595 1.1 mrg t.loc = loc(); 596 1.1 mrg t.value = TOK.endOfFile; 597 1.1 mrg return; 598 1.1 mrg default: 599 1.1 mrg if (c & 0x80) 600 1.1 mrg { 601 1.1 mrg const u = decodeUTF(); 602 1.1 mrg if (u == PS || u == LS) 603 1.1 mrg endOfLine(); 604 1.1 mrg } 605 1.1 mrg p++; 606 1.1 mrg continue; 607 1.1 mrg } 608 1.1 mrg break; 609 1.1 mrg } 610 1.1 mrg p++; 611 1.1 mrg if (p[-2] == '*' && p - 3 != t.ptr) 612 1.1 mrg break; 613 1.1 mrg } 614 1.1 mrg if (commentToken) 615 1.1 mrg { 616 1.1 mrg t.loc = startLoc; 617 1.1 mrg t.value = TOK.comment; 618 1.1 mrg return; 619 1.1 mrg } 620 1.1 mrg else if (doDocComment && t.ptr[2] == '*' && p - 4 != t.ptr) 621 1.1 mrg { 622 1.1 mrg // if /** but not /**/ 623 1.1 mrg getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1); 624 1.1 mrg lastDocLine = scanloc.linnum; 625 1.1 mrg } 626 1.1 mrg continue; 627 1.1 mrg case '/': // do // style comments 628 1.1 mrg startLoc = loc(); 629 1.1 mrg while (1) 630 1.1 mrg { 631 1.1 mrg const c = *++p; 632 1.1 mrg switch (c) 633 1.1 mrg { 634 1.1 mrg case '\n': 635 1.1 mrg break; 636 1.1 mrg case '\r': 637 1.1 mrg if (p[1] == '\n') 638 1.1 mrg p++; 639 1.1 mrg break; 640 1.1 mrg case 0: 641 1.1 mrg case 0x1A: 642 1.1 mrg if (commentToken) 643 1.1 mrg { 644 1.1 mrg p = end; 645 1.1 mrg t.loc = startLoc; 646 1.1 mrg t.value = TOK.comment; 647 1.1 mrg return; 648 1.1 mrg } 649 1.1 mrg if (doDocComment && t.ptr[2] == '/') 650 1.1 mrg { 651 1.1 mrg getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1); 652 1.1 mrg lastDocLine = scanloc.linnum; 653 1.1 mrg } 654 1.1 mrg p = end; 655 1.1 mrg t.loc = loc(); 656 1.1 mrg t.value = TOK.endOfFile; 657 1.1 mrg return; 658 1.1 mrg default: 659 1.1 mrg if (c & 0x80) 660 1.1 mrg { 661 1.1 mrg const u = decodeUTF(); 662 1.1 mrg if (u == PS || u == LS) 663 1.1 mrg break; 664 1.1 mrg } 665 1.1 mrg continue; 666 1.1 mrg } 667 1.1 mrg break; 668 1.1 mrg } 669 1.1 mrg if (commentToken) 670 1.1 mrg { 671 1.1 mrg version (DMDLIB) {} 672 1.1 mrg else 673 1.1 mrg { 674 1.1 mrg p++; 675 1.1 mrg endOfLine(); 676 1.1 mrg } 677 1.1 mrg t.loc = startLoc; 678 1.1 mrg t.value = TOK.comment; 679 1.1 mrg return; 680 1.1 mrg } 681 1.1 mrg if (doDocComment && t.ptr[2] == '/') 682 1.1 mrg { 683 1.1 mrg getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1); 684 1.1 mrg lastDocLine = scanloc.linnum; 685 1.1 mrg } 686 1.1 mrg p++; 687 1.1 mrg endOfLine(); 688 1.1 mrg continue; 689 1.1 mrg case '+': 690 1.1 mrg if (!Ccompile) 691 1.1 mrg { 692 1.1 mrg int nest; 693 1.1 mrg startLoc = loc(); 694 1.1 mrg p++; 695 1.1 mrg nest = 1; 696 1.1 mrg while (1) 697 1.1 mrg { 698 1.1 mrg char c = *p; 699 1.1 mrg switch (c) 700 1.1 mrg { 701 1.1 mrg case '/': 702 1.1 mrg p++; 703 1.1 mrg if (*p == '+') 704 1.1 mrg { 705 1.1 mrg p++; 706 1.1 mrg nest++; 707 1.1 mrg } 708 1.1 mrg continue; 709 1.1 mrg case '+': 710 1.1 mrg p++; 711 1.1 mrg if (*p == '/') 712 1.1 mrg { 713 1.1 mrg p++; 714 1.1 mrg if (--nest == 0) 715 1.1 mrg break; 716 1.1 mrg } 717 1.1 mrg continue; 718 1.1 mrg case '\r': 719 1.1 mrg p++; 720 1.1 mrg if (*p != '\n') 721 1.1 mrg endOfLine(); 722 1.1 mrg continue; 723 1.1 mrg case '\n': 724 1.1 mrg endOfLine(); 725 1.1 mrg p++; 726 1.1 mrg continue; 727 1.1 mrg case 0: 728 1.1 mrg case 0x1A: 729 1.1 mrg error("unterminated /+ +/ comment"); 730 1.1 mrg p = end; 731 1.1 mrg t.loc = loc(); 732 1.1 mrg t.value = TOK.endOfFile; 733 1.1 mrg return; 734 1.1 mrg default: 735 1.1 mrg if (c & 0x80) 736 1.1 mrg { 737 1.1 mrg uint u = decodeUTF(); 738 1.1 mrg if (u == PS || u == LS) 739 1.1 mrg endOfLine(); 740 1.1 mrg } 741 1.1 mrg p++; 742 1.1 mrg continue; 743 1.1 mrg } 744 1.1 mrg break; 745 1.1 mrg } 746 1.1 mrg if (commentToken) 747 1.1 mrg { 748 1.1 mrg t.loc = startLoc; 749 1.1 mrg t.value = TOK.comment; 750 1.1 mrg return; 751 1.1 mrg } 752 1.1 mrg if (doDocComment && t.ptr[2] == '+' && p - 4 != t.ptr) 753 1.1 mrg { 754 1.1 mrg // if /++ but not /++/ 755 1.1 mrg getDocComment(t, lastLine == startLoc.linnum, startLoc.linnum - lastDocLine > 1); 756 1.1 mrg lastDocLine = scanloc.linnum; 757 1.1 mrg } 758 1.1 mrg continue; 759 1.1 mrg } 760 1.1 mrg break; 761 1.1 mrg default: 762 1.1 mrg break; 763 1.1 mrg } 764 1.1 mrg t.value = TOK.div; 765 1.1 mrg return; 766 1.1 mrg case '.': 767 1.1 mrg p++; 768 1.1 mrg if (isdigit(*p)) 769 1.1 mrg { 770 1.1 mrg /* Note that we don't allow ._1 and ._ as being 771 1.1 mrg * valid floating point numbers. 772 1.1 mrg */ 773 1.1 mrg p--; 774 1.1 mrg t.value = inreal(t); 775 1.1 mrg } 776 1.1 mrg else if (p[0] == '.') 777 1.1 mrg { 778 1.1 mrg if (p[1] == '.') 779 1.1 mrg { 780 1.1 mrg p += 2; 781 1.1 mrg t.value = TOK.dotDotDot; 782 1.1 mrg } 783 1.1 mrg else 784 1.1 mrg { 785 1.1 mrg p++; 786 1.1 mrg t.value = TOK.slice; 787 1.1 mrg } 788 1.1 mrg } 789 1.1 mrg else 790 1.1 mrg t.value = TOK.dot; 791 1.1 mrg return; 792 1.1 mrg case '&': 793 1.1 mrg p++; 794 1.1 mrg if (*p == '=') 795 1.1 mrg { 796 1.1 mrg p++; 797 1.1 mrg t.value = TOK.andAssign; 798 1.1 mrg } 799 1.1 mrg else if (*p == '&') 800 1.1 mrg { 801 1.1 mrg p++; 802 1.1 mrg t.value = TOK.andAnd; 803 1.1 mrg } 804 1.1 mrg else 805 1.1 mrg t.value = TOK.and; 806 1.1 mrg return; 807 1.1 mrg case '|': 808 1.1 mrg p++; 809 1.1 mrg if (*p == '=') 810 1.1 mrg { 811 1.1 mrg p++; 812 1.1 mrg t.value = TOK.orAssign; 813 1.1 mrg } 814 1.1 mrg else if (*p == '|') 815 1.1 mrg { 816 1.1 mrg p++; 817 1.1 mrg t.value = TOK.orOr; 818 1.1 mrg } 819 1.1 mrg else 820 1.1 mrg t.value = TOK.or; 821 1.1 mrg return; 822 1.1 mrg case '-': 823 1.1 mrg p++; 824 1.1 mrg if (*p == '=') 825 1.1 mrg { 826 1.1 mrg p++; 827 1.1 mrg t.value = TOK.minAssign; 828 1.1 mrg } 829 1.1 mrg else if (*p == '-') 830 1.1 mrg { 831 1.1 mrg p++; 832 1.1 mrg t.value = TOK.minusMinus; 833 1.1 mrg } 834 1.1 mrg else if (*p == '>') 835 1.1 mrg { 836 1.1 mrg ++p; 837 1.1 mrg t.value = TOK.arrow; 838 1.1 mrg } 839 1.1 mrg else 840 1.1 mrg t.value = TOK.min; 841 1.1 mrg return; 842 1.1 mrg case '+': 843 1.1 mrg p++; 844 1.1 mrg if (*p == '=') 845 1.1 mrg { 846 1.1 mrg p++; 847 1.1 mrg t.value = TOK.addAssign; 848 1.1 mrg } 849 1.1 mrg else if (*p == '+') 850 1.1 mrg { 851 1.1 mrg p++; 852 1.1 mrg t.value = TOK.plusPlus; 853 1.1 mrg } 854 1.1 mrg else 855 1.1 mrg t.value = TOK.add; 856 1.1 mrg return; 857 1.1 mrg case '<': 858 1.1 mrg p++; 859 1.1 mrg if (*p == '=') 860 1.1 mrg { 861 1.1 mrg p++; 862 1.1 mrg t.value = TOK.lessOrEqual; // <= 863 1.1 mrg } 864 1.1 mrg else if (*p == '<') 865 1.1 mrg { 866 1.1 mrg p++; 867 1.1 mrg if (*p == '=') 868 1.1 mrg { 869 1.1 mrg p++; 870 1.1 mrg t.value = TOK.leftShiftAssign; // <<= 871 1.1 mrg } 872 1.1 mrg else 873 1.1 mrg t.value = TOK.leftShift; // << 874 1.1 mrg } 875 1.1 mrg else if (*p == ':' && Ccompile) 876 1.1 mrg { 877 1.1 mrg ++p; 878 1.1 mrg t.value = TOK.leftBracket; // <: 879 1.1 mrg } 880 1.1 mrg else if (*p == '%' && Ccompile) 881 1.1 mrg { 882 1.1 mrg ++p; 883 1.1 mrg t.value = TOK.leftCurly; // <% 884 1.1 mrg } 885 1.1 mrg else 886 1.1 mrg t.value = TOK.lessThan; // < 887 1.1 mrg return; 888 1.1 mrg case '>': 889 1.1 mrg p++; 890 1.1 mrg if (*p == '=') 891 1.1 mrg { 892 1.1 mrg p++; 893 1.1 mrg t.value = TOK.greaterOrEqual; // >= 894 1.1 mrg } 895 1.1 mrg else if (*p == '>') 896 1.1 mrg { 897 1.1 mrg p++; 898 1.1 mrg if (*p == '=') 899 1.1 mrg { 900 1.1 mrg p++; 901 1.1 mrg t.value = TOK.rightShiftAssign; // >>= 902 1.1 mrg } 903 1.1 mrg else if (*p == '>') 904 1.1 mrg { 905 1.1 mrg p++; 906 1.1 mrg if (*p == '=') 907 1.1 mrg { 908 1.1 mrg p++; 909 1.1 mrg t.value = TOK.unsignedRightShiftAssign; // >>>= 910 1.1 mrg } 911 1.1 mrg else 912 1.1 mrg t.value = TOK.unsignedRightShift; // >>> 913 1.1 mrg } 914 1.1 mrg else 915 1.1 mrg t.value = TOK.rightShift; // >> 916 1.1 mrg } 917 1.1 mrg else 918 1.1 mrg t.value = TOK.greaterThan; // > 919 1.1 mrg return; 920 1.1 mrg case '!': 921 1.1 mrg p++; 922 1.1 mrg if (*p == '=') 923 1.1 mrg { 924 1.1 mrg p++; 925 1.1 mrg t.value = TOK.notEqual; // != 926 1.1 mrg } 927 1.1 mrg else 928 1.1 mrg t.value = TOK.not; // ! 929 1.1 mrg return; 930 1.1 mrg case '=': 931 1.1 mrg p++; 932 1.1 mrg if (*p == '=') 933 1.1 mrg { 934 1.1 mrg p++; 935 1.1 mrg t.value = TOK.equal; // == 936 1.1 mrg } 937 1.1 mrg else if (*p == '>') 938 1.1 mrg { 939 1.1 mrg p++; 940 1.1 mrg t.value = TOK.goesTo; // => 941 1.1 mrg } 942 1.1 mrg else 943 1.1 mrg t.value = TOK.assign; // = 944 1.1 mrg return; 945 1.1 mrg case '~': 946 1.1 mrg p++; 947 1.1 mrg if (*p == '=') 948 1.1 mrg { 949 1.1 mrg p++; 950 1.1 mrg t.value = TOK.concatenateAssign; // ~= 951 1.1 mrg } 952 1.1 mrg else 953 1.1 mrg t.value = TOK.tilde; // ~ 954 1.1 mrg return; 955 1.1 mrg case '^': 956 1.1 mrg p++; 957 1.1 mrg if (*p == '^') 958 1.1 mrg { 959 1.1 mrg p++; 960 1.1 mrg if (*p == '=') 961 1.1 mrg { 962 1.1 mrg p++; 963 1.1 mrg t.value = TOK.powAssign; // ^^= 964 1.1 mrg } 965 1.1 mrg else 966 1.1 mrg t.value = TOK.pow; // ^^ 967 1.1 mrg } 968 1.1 mrg else if (*p == '=') 969 1.1 mrg { 970 1.1 mrg p++; 971 1.1 mrg t.value = TOK.xorAssign; // ^= 972 1.1 mrg } 973 1.1 mrg else 974 1.1 mrg t.value = TOK.xor; // ^ 975 1.1 mrg return; 976 1.1 mrg case '(': 977 1.1 mrg p++; 978 1.1 mrg t.value = TOK.leftParenthesis; 979 1.1 mrg return; 980 1.1 mrg case ')': 981 1.1 mrg p++; 982 1.1 mrg t.value = TOK.rightParenthesis; 983 1.1 mrg return; 984 1.1 mrg case '[': 985 1.1 mrg p++; 986 1.1 mrg t.value = TOK.leftBracket; 987 1.1 mrg return; 988 1.1 mrg case ']': 989 1.1 mrg p++; 990 1.1 mrg t.value = TOK.rightBracket; 991 1.1 mrg return; 992 1.1 mrg case '{': 993 1.1 mrg p++; 994 1.1 mrg t.value = TOK.leftCurly; 995 1.1 mrg return; 996 1.1 mrg case '}': 997 1.1 mrg p++; 998 1.1 mrg t.value = TOK.rightCurly; 999 1.1 mrg return; 1000 1.1 mrg case '?': 1001 1.1 mrg p++; 1002 1.1 mrg t.value = TOK.question; 1003 1.1 mrg return; 1004 1.1 mrg case ',': 1005 1.1 mrg p++; 1006 1.1 mrg t.value = TOK.comma; 1007 1.1 mrg return; 1008 1.1 mrg case ';': 1009 1.1 mrg p++; 1010 1.1 mrg t.value = TOK.semicolon; 1011 1.1 mrg return; 1012 1.1 mrg case ':': 1013 1.1 mrg p++; 1014 1.1 mrg if (*p == ':') 1015 1.1 mrg { 1016 1.1 mrg ++p; 1017 1.1 mrg t.value = TOK.colonColon; 1018 1.1 mrg } 1019 1.1 mrg else if (*p == '>' && Ccompile) 1020 1.1 mrg { 1021 1.1 mrg ++p; 1022 1.1 mrg t.value = TOK.rightBracket; 1023 1.1 mrg } 1024 1.1 mrg else 1025 1.1 mrg t.value = TOK.colon; 1026 1.1 mrg return; 1027 1.1 mrg case '$': 1028 1.1 mrg p++; 1029 1.1 mrg t.value = TOK.dollar; 1030 1.1 mrg return; 1031 1.1 mrg case '@': 1032 1.1 mrg p++; 1033 1.1 mrg t.value = TOK.at; 1034 1.1 mrg return; 1035 1.1 mrg case '*': 1036 1.1 mrg p++; 1037 1.1 mrg if (*p == '=') 1038 1.1 mrg { 1039 1.1 mrg p++; 1040 1.1 mrg t.value = TOK.mulAssign; 1041 1.1 mrg } 1042 1.1 mrg else 1043 1.1 mrg t.value = TOK.mul; 1044 1.1 mrg return; 1045 1.1 mrg case '%': 1046 1.1 mrg p++; 1047 1.1 mrg if (*p == '=') 1048 1.1 mrg { 1049 1.1 mrg p++; 1050 1.1 mrg t.value = TOK.modAssign; 1051 1.1 mrg } 1052 1.1 mrg else if (*p == '>' && Ccompile) 1053 1.1 mrg { 1054 1.1 mrg ++p; 1055 1.1 mrg t.value = TOK.rightCurly; 1056 1.1 mrg } 1057 1.1 mrg else if (*p == ':' && Ccompile) 1058 1.1 mrg { 1059 1.1 mrg goto case '#'; // %: means # 1060 1.1 mrg } 1061 1.1 mrg else 1062 1.1 mrg t.value = TOK.mod; 1063 1.1 mrg return; 1064 1.1 mrg case '#': 1065 1.1 mrg { 1066 1.1 mrg // https://issues.dlang.org/show_bug.cgi?id=22825 1067 1.1 mrg // Special token sequences are terminated by newlines, 1068 1.1 mrg // and should not be skipped over. 1069 1.1 mrg this.tokenizeNewlines = true; 1070 1.1 mrg p++; 1071 1.1 mrg if (parseSpecialTokenSequence()) 1072 1.1 mrg continue; 1073 1.1 mrg t.value = TOK.pound; 1074 1.1 mrg return; 1075 1.1 mrg } 1076 1.1 mrg default: 1077 1.1 mrg { 1078 1.1 mrg dchar c = *p; 1079 1.1 mrg if (c & 0x80) 1080 1.1 mrg { 1081 1.1 mrg c = decodeUTF(); 1082 1.1 mrg // Check for start of unicode identifier 1083 1.1 mrg if (isUniAlpha(c)) 1084 1.1 mrg goto case_ident; 1085 1.1 mrg if (c == PS || c == LS) 1086 1.1 mrg { 1087 1.1 mrg endOfLine(); 1088 1.1 mrg p++; 1089 1.1 mrg if (tokenizeNewlines) 1090 1.1 mrg { 1091 1.1 mrg t.value = TOK.endOfLine; 1092 1.1 mrg tokenizeNewlines = false; 1093 1.1 mrg return; 1094 1.1 mrg } 1095 1.1 mrg continue; 1096 1.1 mrg } 1097 1.1 mrg } 1098 1.1 mrg if (c < 0x80 && isprint(c)) 1099 1.1 mrg error("character '%c' is not a valid token", c); 1100 1.1 mrg else 1101 1.1 mrg error("character 0x%02x is not a valid token", c); 1102 1.1 mrg p++; 1103 1.1 mrg continue; 1104 1.1 mrg } 1105 1.1 mrg } 1106 1.1 mrg } 1107 1.1 mrg } 1108 1.1 mrg 1109 1.1 mrg final Token* peek(Token* ct) 1110 1.1 mrg { 1111 1.1 mrg Token* t; 1112 1.1 mrg if (ct.next) 1113 1.1 mrg t = ct.next; 1114 1.1 mrg else 1115 1.1 mrg { 1116 1.1 mrg t = allocateToken(); 1117 1.1 mrg scan(t); 1118 1.1 mrg ct.next = t; 1119 1.1 mrg } 1120 1.1 mrg return t; 1121 1.1 mrg } 1122 1.1 mrg 1123 1.1 mrg /********************************* 1124 1.1 mrg * tk is on the opening (. 1125 1.1 mrg * Look ahead and return token that is past the closing ). 1126 1.1 mrg */ 1127 1.1 mrg final Token* peekPastParen(Token* tk) 1128 1.1 mrg { 1129 1.1 mrg //printf("peekPastParen()\n"); 1130 1.1 mrg int parens = 1; 1131 1.1 mrg int curlynest = 0; 1132 1.1 mrg while (1) 1133 1.1 mrg { 1134 1.1 mrg tk = peek(tk); 1135 1.1 mrg //tk.print(); 1136 1.1 mrg switch (tk.value) 1137 1.1 mrg { 1138 1.1 mrg case TOK.leftParenthesis: 1139 1.1 mrg parens++; 1140 1.1 mrg continue; 1141 1.1 mrg case TOK.rightParenthesis: 1142 1.1 mrg --parens; 1143 1.1 mrg if (parens) 1144 1.1 mrg continue; 1145 1.1 mrg tk = peek(tk); 1146 1.1 mrg break; 1147 1.1 mrg case TOK.leftCurly: 1148 1.1 mrg curlynest++; 1149 1.1 mrg continue; 1150 1.1 mrg case TOK.rightCurly: 1151 1.1 mrg if (--curlynest >= 0) 1152 1.1 mrg continue; 1153 1.1 mrg break; 1154 1.1 mrg case TOK.semicolon: 1155 1.1 mrg if (curlynest) 1156 1.1 mrg continue; 1157 1.1 mrg break; 1158 1.1 mrg case TOK.endOfFile: 1159 1.1 mrg break; 1160 1.1 mrg default: 1161 1.1 mrg continue; 1162 1.1 mrg } 1163 1.1 mrg return tk; 1164 1.1 mrg } 1165 1.1 mrg } 1166 1.1 mrg 1167 1.1 mrg /******************************************* 1168 1.1 mrg * Parse escape sequence. 1169 1.1 mrg */ 1170 1.1 mrg private uint escapeSequence() 1171 1.1 mrg { 1172 1.1 mrg return Lexer.escapeSequence(token.loc, p, Ccompile); 1173 1.1 mrg } 1174 1.1 mrg 1175 1.1 mrg /******** 1176 1.1 mrg * Parse the given string literal escape sequence into a single character. 1177 1.1 mrg * D https://dlang.org/spec/lex.html#escape_sequences 1178 1.1 mrg * C11 6.4.4.4 1179 1.1 mrg * Params: 1180 1.1 mrg * loc = location to use for error messages 1181 1.1 mrg * sequence = pointer to string with escape sequence to parse. Updated to 1182 1.1 mrg * point past the end of the escape sequence 1183 1.1 mrg * Ccompile = true for compile C11 escape sequences 1184 1.1 mrg * Returns: 1185 1.1 mrg * the escape sequence as a single character 1186 1.1 mrg */ 1187 1.1 mrg private static dchar escapeSequence(const ref Loc loc, ref const(char)* sequence, bool Ccompile) 1188 1.1 mrg { 1189 1.1 mrg const(char)* p = sequence; // cache sequence reference on stack 1190 1.1 mrg scope(exit) sequence = p; 1191 1.1 mrg 1192 1.1 mrg uint c = *p; 1193 1.1 mrg int ndigits; 1194 1.1 mrg switch (c) 1195 1.1 mrg { 1196 1.1 mrg case '\'': 1197 1.1 mrg case '"': 1198 1.1 mrg case '?': 1199 1.1 mrg case '\\': 1200 1.1 mrg Lconsume: 1201 1.1 mrg p++; 1202 1.1 mrg break; 1203 1.1 mrg case 'a': 1204 1.1 mrg c = 7; 1205 1.1 mrg goto Lconsume; 1206 1.1 mrg case 'b': 1207 1.1 mrg c = 8; 1208 1.1 mrg goto Lconsume; 1209 1.1 mrg case 'f': 1210 1.1 mrg c = 12; 1211 1.1 mrg goto Lconsume; 1212 1.1 mrg case 'n': 1213 1.1 mrg c = 10; 1214 1.1 mrg goto Lconsume; 1215 1.1 mrg case 'r': 1216 1.1 mrg c = 13; 1217 1.1 mrg goto Lconsume; 1218 1.1 mrg case 't': 1219 1.1 mrg c = 9; 1220 1.1 mrg goto Lconsume; 1221 1.1 mrg case 'v': 1222 1.1 mrg c = 11; 1223 1.1 mrg goto Lconsume; 1224 1.1 mrg case 'u': 1225 1.1 mrg ndigits = 4; 1226 1.1 mrg goto Lhex; 1227 1.1 mrg case 'U': 1228 1.1 mrg ndigits = 8; 1229 1.1 mrg goto Lhex; 1230 1.1 mrg case 'x': 1231 1.1 mrg ndigits = 2; 1232 1.1 mrg Lhex: 1233 1.1 mrg p++; 1234 1.1 mrg c = *p; 1235 1.1 mrg if (ishex(cast(char)c)) 1236 1.1 mrg { 1237 1.1 mrg uint v = 0; 1238 1.1 mrg int n = 0; 1239 1.1 mrg if (Ccompile && ndigits == 2) 1240 1.1 mrg { 1241 1.1 mrg /* C11 6.4.4.4-7 one to infinity hex digits 1242 1.1 mrg */ 1243 1.1 mrg do 1244 1.1 mrg { 1245 1.1 mrg if (isdigit(cast(char)c)) 1246 1.1 mrg c -= '0'; 1247 1.1 mrg else if (islower(c)) 1248 1.1 mrg c -= 'a' - 10; 1249 1.1 mrg else 1250 1.1 mrg c -= 'A' - 10; 1251 1.1 mrg v = v * 16 + c; 1252 1.1 mrg c = *++p; 1253 1.1 mrg } while (ishex(cast(char)c)); 1254 1.1 mrg } 1255 1.1 mrg else 1256 1.1 mrg { 1257 1.1 mrg while (1) 1258 1.1 mrg { 1259 1.1 mrg if (isdigit(cast(char)c)) 1260 1.1 mrg c -= '0'; 1261 1.1 mrg else if (islower(c)) 1262 1.1 mrg c -= 'a' - 10; 1263 1.1 mrg else 1264 1.1 mrg c -= 'A' - 10; 1265 1.1 mrg v = v * 16 + c; 1266 1.1 mrg c = *++p; 1267 1.1 mrg if (++n == ndigits) 1268 1.1 mrg break; 1269 1.1 mrg if (!ishex(cast(char)c)) 1270 1.1 mrg { 1271 1.1 mrg .error(loc, "escape hex sequence has %d hex digits instead of %d", n, ndigits); 1272 1.1 mrg break; 1273 1.1 mrg } 1274 1.1 mrg } 1275 1.1 mrg if (ndigits != 2 && !utf_isValidDchar(v)) 1276 1.1 mrg { 1277 1.1 mrg .error(loc, "invalid UTF character \\U%08x", v); 1278 1.1 mrg v = '?'; // recover with valid UTF character 1279 1.1 mrg } 1280 1.1 mrg } 1281 1.1 mrg c = v; 1282 1.1 mrg } 1283 1.1 mrg else 1284 1.1 mrg { 1285 1.1 mrg .error(loc, "undefined escape hex sequence \\%c%c", sequence[0], c); 1286 1.1 mrg p++; 1287 1.1 mrg } 1288 1.1 mrg break; 1289 1.1 mrg case '&': 1290 1.1 mrg if (Ccompile) 1291 1.1 mrg goto default; 1292 1.1 mrg 1293 1.1 mrg // named character entity 1294 1.1 mrg for (const idstart = ++p; 1; p++) 1295 1.1 mrg { 1296 1.1 mrg switch (*p) 1297 1.1 mrg { 1298 1.1 mrg case ';': 1299 1.1 mrg c = HtmlNamedEntity(idstart, p - idstart); 1300 1.1 mrg if (c == ~0) 1301 1.1 mrg { 1302 1.1 mrg .error(loc, "unnamed character entity &%.*s;", cast(int)(p - idstart), idstart); 1303 1.1 mrg c = '?'; 1304 1.1 mrg } 1305 1.1 mrg p++; 1306 1.1 mrg break; 1307 1.1 mrg default: 1308 1.1 mrg if (isalpha(*p) || (p != idstart && isdigit(*p))) 1309 1.1 mrg continue; 1310 1.1 mrg .error(loc, "unterminated named entity &%.*s;", cast(int)(p - idstart + 1), idstart); 1311 1.1 mrg c = '?'; 1312 1.1 mrg break; 1313 1.1 mrg } 1314 1.1 mrg break; 1315 1.1 mrg } 1316 1.1 mrg break; 1317 1.1 mrg case 0: 1318 1.1 mrg case 0x1A: 1319 1.1 mrg // end of file 1320 1.1 mrg c = '\\'; 1321 1.1 mrg break; 1322 1.1 mrg default: 1323 1.1 mrg if (isoctal(cast(char)c)) 1324 1.1 mrg { 1325 1.1 mrg uint v = 0; 1326 1.1 mrg int n = 0; 1327 1.1 mrg do 1328 1.1 mrg { 1329 1.1 mrg v = v * 8 + (c - '0'); 1330 1.1 mrg c = *++p; 1331 1.1 mrg } 1332 1.1 mrg while (++n < 3 && isoctal(cast(char)c)); 1333 1.1 mrg c = v; 1334 1.1 mrg if (c > 0xFF) 1335 1.1 mrg .error(loc, "escape octal sequence \\%03o is larger than \\377", c); 1336 1.1 mrg } 1337 1.1 mrg else 1338 1.1 mrg { 1339 1.1 mrg .error(loc, "undefined escape sequence \\%c", c); 1340 1.1 mrg p++; 1341 1.1 mrg } 1342 1.1 mrg break; 1343 1.1 mrg } 1344 1.1 mrg return c; 1345 1.1 mrg } 1346 1.1 mrg 1347 1.1 mrg /** 1348 1.1 mrg Lex a wysiwyg string. `p` must be pointing to the first character before the 1349 1.1 mrg contents of the string literal. The character pointed to by `p` will be used as 1350 1.1 mrg the terminating character (i.e. backtick or double-quote). 1351 1.1 mrg Params: 1352 1.1 mrg result = pointer to the token that accepts the result 1353 1.1 mrg */ 1354 1.1 mrg private void wysiwygStringConstant(Token* result) 1355 1.1 mrg { 1356 1.1 mrg result.value = TOK.string_; 1357 1.1 mrg Loc start = loc(); 1358 1.1 mrg auto terminator = p[0]; 1359 1.1 mrg p++; 1360 1.1 mrg stringbuffer.setsize(0); 1361 1.1 mrg while (1) 1362 1.1 mrg { 1363 1.1 mrg dchar c = p[0]; 1364 1.1 mrg p++; 1365 1.1 mrg switch (c) 1366 1.1 mrg { 1367 1.1 mrg case '\n': 1368 1.1 mrg endOfLine(); 1369 1.1 mrg break; 1370 1.1 mrg case '\r': 1371 1.1 mrg if (p[0] == '\n') 1372 1.1 mrg continue; // ignore 1373 1.1 mrg c = '\n'; // treat EndOfLine as \n character 1374 1.1 mrg endOfLine(); 1375 1.1 mrg break; 1376 1.1 mrg case 0: 1377 1.1 mrg case 0x1A: 1378 1.1 mrg error("unterminated string constant starting at %s", start.toChars()); 1379 1.1 mrg result.setString(); 1380 1.1 mrg // rewind `p` so it points to the EOF character 1381 1.1 mrg p--; 1382 1.1 mrg return; 1383 1.1 mrg default: 1384 1.1 mrg if (c == terminator) 1385 1.1 mrg { 1386 1.1 mrg result.setString(stringbuffer); 1387 1.1 mrg stringPostfix(result); 1388 1.1 mrg return; 1389 1.1 mrg } 1390 1.1 mrg else if (c & 0x80) 1391 1.1 mrg { 1392 1.1 mrg p--; 1393 1.1 mrg const u = decodeUTF(); 1394 1.1 mrg p++; 1395 1.1 mrg if (u == PS || u == LS) 1396 1.1 mrg endOfLine(); 1397 1.1 mrg stringbuffer.writeUTF8(u); 1398 1.1 mrg continue; 1399 1.1 mrg } 1400 1.1 mrg break; 1401 1.1 mrg } 1402 1.1 mrg stringbuffer.writeByte(c); 1403 1.1 mrg } 1404 1.1 mrg } 1405 1.1 mrg 1406 1.1 mrg /** 1407 1.1 mrg Lex a delimited string. Some examples of delimited strings are: 1408 1.1 mrg --- 1409 1.1 mrg q"(foo(xxx))" // "foo(xxx)" 1410 1.1 mrg q"[foo$(LPAREN)]" // "foo$(LPAREN)" 1411 1.1 mrg q"/foo]/" // "foo]" 1412 1.1 mrg q"HERE 1413 1.1 mrg foo 1414 1.1 mrg HERE" // "foo\n" 1415 1.1 mrg --- 1416 1.1 mrg It is assumed that `p` points to the opening double-quote '"'. 1417 1.1 mrg Params: 1418 1.1 mrg result = pointer to the token that accepts the result 1419 1.1 mrg */ 1420 1.1 mrg private void delimitedStringConstant(Token* result) 1421 1.1 mrg { 1422 1.1 mrg result.value = TOK.string_; 1423 1.1 mrg Loc start = loc(); 1424 1.1 mrg dchar delimleft = 0; 1425 1.1 mrg dchar delimright = 0; 1426 1.1 mrg uint nest = 1; 1427 1.1 mrg uint nestcount = ~0; // dead assignment, needed to suppress warning 1428 1.1 mrg Identifier hereid = null; 1429 1.1 mrg uint blankrol = 0; 1430 1.1 mrg uint startline = 0; 1431 1.1 mrg p++; 1432 1.1 mrg stringbuffer.setsize(0); 1433 1.1 mrg while (1) 1434 1.1 mrg { 1435 1.1 mrg dchar c = *p++; 1436 1.1 mrg //printf("c = '%c'\n", c); 1437 1.1 mrg switch (c) 1438 1.1 mrg { 1439 1.1 mrg case '\n': 1440 1.1 mrg Lnextline: 1441 1.1 mrg endOfLine(); 1442 1.1 mrg startline = 1; 1443 1.1 mrg if (blankrol) 1444 1.1 mrg { 1445 1.1 mrg blankrol = 0; 1446 1.1 mrg continue; 1447 1.1 mrg } 1448 1.1 mrg if (hereid) 1449 1.1 mrg { 1450 1.1 mrg stringbuffer.writeUTF8(c); 1451 1.1 mrg continue; 1452 1.1 mrg } 1453 1.1 mrg break; 1454 1.1 mrg case '\r': 1455 1.1 mrg if (*p == '\n') 1456 1.1 mrg continue; // ignore 1457 1.1 mrg c = '\n'; // treat EndOfLine as \n character 1458 1.1 mrg goto Lnextline; 1459 1.1 mrg case 0: 1460 1.1 mrg case 0x1A: 1461 1.1 mrg error("unterminated delimited string constant starting at %s", start.toChars()); 1462 1.1 mrg result.setString(); 1463 1.1 mrg // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token). 1464 1.1 mrg p--; 1465 1.1 mrg return; 1466 1.1 mrg default: 1467 1.1 mrg if (c & 0x80) 1468 1.1 mrg { 1469 1.1 mrg p--; 1470 1.1 mrg c = decodeUTF(); 1471 1.1 mrg p++; 1472 1.1 mrg if (c == PS || c == LS) 1473 1.1 mrg goto Lnextline; 1474 1.1 mrg } 1475 1.1 mrg break; 1476 1.1 mrg } 1477 1.1 mrg if (delimleft == 0) 1478 1.1 mrg { 1479 1.1 mrg delimleft = c; 1480 1.1 mrg nest = 1; 1481 1.1 mrg nestcount = 1; 1482 1.1 mrg if (c == '(') 1483 1.1 mrg delimright = ')'; 1484 1.1 mrg else if (c == '{') 1485 1.1 mrg delimright = '}'; 1486 1.1 mrg else if (c == '[') 1487 1.1 mrg delimright = ']'; 1488 1.1 mrg else if (c == '<') 1489 1.1 mrg delimright = '>'; 1490 1.1 mrg else if (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) 1491 1.1 mrg { 1492 1.1 mrg // Start of identifier; must be a heredoc 1493 1.1 mrg Token tok; 1494 1.1 mrg p--; 1495 1.1 mrg scan(&tok); // read in heredoc identifier 1496 1.1 mrg if (tok.value != TOK.identifier) 1497 1.1 mrg { 1498 1.1 mrg error("identifier expected for heredoc, not %s", tok.toChars()); 1499 1.1 mrg delimright = c; 1500 1.1 mrg } 1501 1.1 mrg else 1502 1.1 mrg { 1503 1.1 mrg hereid = tok.ident; 1504 1.1 mrg //printf("hereid = '%s'\n", hereid.toChars()); 1505 1.1 mrg blankrol = 1; 1506 1.1 mrg } 1507 1.1 mrg nest = 0; 1508 1.1 mrg } 1509 1.1 mrg else 1510 1.1 mrg { 1511 1.1 mrg delimright = c; 1512 1.1 mrg nest = 0; 1513 1.1 mrg if (isspace(c)) 1514 1.1 mrg error("delimiter cannot be whitespace"); 1515 1.1 mrg } 1516 1.1 mrg } 1517 1.1 mrg else 1518 1.1 mrg { 1519 1.1 mrg if (blankrol) 1520 1.1 mrg { 1521 1.1 mrg error("heredoc rest of line should be blank"); 1522 1.1 mrg blankrol = 0; 1523 1.1 mrg continue; 1524 1.1 mrg } 1525 1.1 mrg if (nest == 1) 1526 1.1 mrg { 1527 1.1 mrg if (c == delimleft) 1528 1.1 mrg nestcount++; 1529 1.1 mrg else if (c == delimright) 1530 1.1 mrg { 1531 1.1 mrg nestcount--; 1532 1.1 mrg if (nestcount == 0) 1533 1.1 mrg goto Ldone; 1534 1.1 mrg } 1535 1.1 mrg } 1536 1.1 mrg else if (c == delimright) 1537 1.1 mrg goto Ldone; 1538 1.1 mrg if (startline && (isalpha(c) || c == '_' || (c >= 0x80 && isUniAlpha(c))) && hereid) 1539 1.1 mrg { 1540 1.1 mrg Token tok; 1541 1.1 mrg auto psave = p; 1542 1.1 mrg p--; 1543 1.1 mrg scan(&tok); // read in possible heredoc identifier 1544 1.1 mrg //printf("endid = '%s'\n", tok.ident.toChars()); 1545 1.1 mrg if (tok.value == TOK.identifier && tok.ident is hereid) 1546 1.1 mrg { 1547 1.1 mrg /* should check that rest of line is blank 1548 1.1 mrg */ 1549 1.1 mrg goto Ldone; 1550 1.1 mrg } 1551 1.1 mrg p = psave; 1552 1.1 mrg } 1553 1.1 mrg stringbuffer.writeUTF8(c); 1554 1.1 mrg startline = 0; 1555 1.1 mrg } 1556 1.1 mrg } 1557 1.1 mrg Ldone: 1558 1.1 mrg if (*p == '"') 1559 1.1 mrg p++; 1560 1.1 mrg else if (hereid) 1561 1.1 mrg error("delimited string must end in `%s\"`", hereid.toChars()); 1562 1.1 mrg else if (isspace(delimright)) 1563 1.1 mrg error("delimited string must end in `\"`"); 1564 1.1 mrg else 1565 1.1 mrg error("delimited string must end in `%c\"`", delimright); 1566 1.1 mrg result.setString(stringbuffer); 1567 1.1 mrg stringPostfix(result); 1568 1.1 mrg } 1569 1.1 mrg 1570 1.1 mrg /** 1571 1.1 mrg Lex a token string. Some examples of token strings are: 1572 1.1 mrg --- 1573 1.1 mrg q{ foo(xxx) } // " foo(xxx) " 1574 1.1 mrg q{foo$(LPAREN)} // "foo$(LPAREN)" 1575 1.1 mrg q{{foo}"}"} // "{foo}"}"" 1576 1.1 mrg --- 1577 1.1 mrg It is assumed that `p` points to the opening curly-brace. 1578 1.1 mrg Params: 1579 1.1 mrg result = pointer to the token that accepts the result 1580 1.1 mrg */ 1581 1.1 mrg private void tokenStringConstant(Token* result) 1582 1.1 mrg { 1583 1.1 mrg result.value = TOK.string_; 1584 1.1 mrg 1585 1.1 mrg uint nest = 1; 1586 1.1 mrg const start = loc(); 1587 1.1 mrg const pstart = ++p; 1588 1.1 mrg inTokenStringConstant++; 1589 1.1 mrg scope(exit) inTokenStringConstant--; 1590 1.1 mrg while (1) 1591 1.1 mrg { 1592 1.1 mrg Token tok; 1593 1.1 mrg scan(&tok); 1594 1.1 mrg switch (tok.value) 1595 1.1 mrg { 1596 1.1 mrg case TOK.leftCurly: 1597 1.1 mrg nest++; 1598 1.1 mrg continue; 1599 1.1 mrg case TOK.rightCurly: 1600 1.1 mrg if (--nest == 0) 1601 1.1 mrg { 1602 1.1 mrg result.setString(pstart, p - 1 - pstart); 1603 1.1 mrg stringPostfix(result); 1604 1.1 mrg return; 1605 1.1 mrg } 1606 1.1 mrg continue; 1607 1.1 mrg case TOK.endOfFile: 1608 1.1 mrg error("unterminated token string constant starting at %s", start.toChars()); 1609 1.1 mrg result.setString(); 1610 1.1 mrg return; 1611 1.1 mrg default: 1612 1.1 mrg continue; 1613 1.1 mrg } 1614 1.1 mrg } 1615 1.1 mrg } 1616 1.1 mrg 1617 1.1 mrg /** 1618 1.1 mrg Scan a quoted string while building the processed string value by 1619 1.1 mrg handling escape sequences. The result is returned in the given `t` token. 1620 1.1 mrg This function assumes that `p` currently points to the opening quote 1621 1.1 mrg of the string. 1622 1.1 mrg Params: 1623 1.1 mrg t = the token to set the resulting string to 1624 1.1 mrg * References: 1625 1.1 mrg * D https://dlang.org/spec/lex.html#double_quoted_strings 1626 1.1 mrg * ImportC C11 6.4.5 1627 1.1 mrg */ 1628 1.1 mrg private void escapeStringConstant(Token* t) 1629 1.1 mrg { 1630 1.1 mrg t.value = TOK.string_; 1631 1.1 mrg 1632 1.1 mrg const start = loc(); 1633 1.1 mrg const tc = *p++; // opening quote 1634 1.1 mrg stringbuffer.setsize(0); 1635 1.1 mrg while (1) 1636 1.1 mrg { 1637 1.1 mrg dchar c = *p++; 1638 1.1 mrg switch (c) 1639 1.1 mrg { 1640 1.1 mrg case '\\': 1641 1.1 mrg switch (*p) 1642 1.1 mrg { 1643 1.1 mrg case '&': 1644 1.1 mrg if (Ccompile) 1645 1.1 mrg goto default; 1646 1.1 mrg goto case; 1647 1.1 mrg 1648 1.1 mrg case 'u': 1649 1.1 mrg case 'U': 1650 1.1 mrg c = escapeSequence(); 1651 1.1 mrg stringbuffer.writeUTF8(c); 1652 1.1 mrg continue; 1653 1.1 mrg default: 1654 1.1 mrg c = escapeSequence(); 1655 1.1 mrg break; 1656 1.1 mrg } 1657 1.1 mrg break; 1658 1.1 mrg case '\n': 1659 1.1 mrg endOfLine(); 1660 1.1 mrg if (Ccompile) 1661 1.1 mrg goto Lunterminated; 1662 1.1 mrg break; 1663 1.1 mrg case '\r': 1664 1.1 mrg if (*p == '\n') 1665 1.1 mrg continue; // ignore 1666 1.1 mrg c = '\n'; // treat EndOfLine as \n character 1667 1.1 mrg endOfLine(); 1668 1.1 mrg if (Ccompile) 1669 1.1 mrg goto Lunterminated; 1670 1.1 mrg break; 1671 1.1 mrg case '\'': 1672 1.1 mrg case '"': 1673 1.1 mrg if (c != tc) 1674 1.1 mrg goto default; 1675 1.1 mrg t.setString(stringbuffer); 1676 1.1 mrg if (!Ccompile) 1677 1.1 mrg stringPostfix(t); 1678 1.1 mrg return; 1679 1.1 mrg case 0: 1680 1.1 mrg case 0x1A: 1681 1.1 mrg // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token). 1682 1.1 mrg p--; 1683 1.1 mrg Lunterminated: 1684 1.1 mrg error("unterminated string constant starting at %s", start.toChars()); 1685 1.1 mrg t.setString(); 1686 1.1 mrg return; 1687 1.1 mrg default: 1688 1.1 mrg if (c & 0x80) 1689 1.1 mrg { 1690 1.1 mrg p--; 1691 1.1 mrg c = decodeUTF(); 1692 1.1 mrg if (c == LS || c == PS) 1693 1.1 mrg { 1694 1.1 mrg c = '\n'; 1695 1.1 mrg endOfLine(); 1696 1.1 mrg if (Ccompile) 1697 1.1 mrg goto Lunterminated; 1698 1.1 mrg } 1699 1.1 mrg p++; 1700 1.1 mrg stringbuffer.writeUTF8(c); 1701 1.1 mrg continue; 1702 1.1 mrg } 1703 1.1 mrg break; 1704 1.1 mrg } 1705 1.1 mrg stringbuffer.writeByte(c); 1706 1.1 mrg } 1707 1.1 mrg } 1708 1.1 mrg 1709 1.1 mrg /************************************** 1710 1.1 mrg * Reference: 1711 1.1 mrg * https://dlang.org/spec/lex.html#characterliteral 1712 1.1 mrg */ 1713 1.1 mrg private TOK charConstant(Token* t) 1714 1.1 mrg { 1715 1.1 mrg TOK tk = TOK.charLiteral; 1716 1.1 mrg //printf("Lexer::charConstant\n"); 1717 1.1 mrg p++; 1718 1.1 mrg dchar c = *p++; 1719 1.1 mrg switch (c) 1720 1.1 mrg { 1721 1.1 mrg case '\\': 1722 1.1 mrg switch (*p) 1723 1.1 mrg { 1724 1.1 mrg case 'u': 1725 1.1 mrg t.unsvalue = escapeSequence(); 1726 1.1 mrg tk = TOK.wcharLiteral; 1727 1.1 mrg break; 1728 1.1 mrg case 'U': 1729 1.1 mrg case '&': 1730 1.1 mrg t.unsvalue = escapeSequence(); 1731 1.1 mrg tk = TOK.dcharLiteral; 1732 1.1 mrg break; 1733 1.1 mrg default: 1734 1.1 mrg t.unsvalue = escapeSequence(); 1735 1.1 mrg break; 1736 1.1 mrg } 1737 1.1 mrg break; 1738 1.1 mrg case '\n': 1739 1.1 mrg L1: 1740 1.1 mrg endOfLine(); 1741 1.1 mrg goto case; 1742 1.1 mrg case '\r': 1743 1.1 mrg goto case '\''; 1744 1.1 mrg case 0: 1745 1.1 mrg case 0x1A: 1746 1.1 mrg // decrement `p`, because it needs to point to the next token (the 0 or 0x1A character is the TOK.endOfFile token). 1747 1.1 mrg p--; 1748 1.1 mrg goto case; 1749 1.1 mrg case '\'': 1750 1.1 mrg error("unterminated character constant"); 1751 1.1 mrg t.unsvalue = '?'; 1752 1.1 mrg return tk; 1753 1.1 mrg default: 1754 1.1 mrg if (c & 0x80) 1755 1.1 mrg { 1756 1.1 mrg p--; 1757 1.1 mrg c = decodeUTF(); 1758 1.1 mrg p++; 1759 1.1 mrg if (c == LS || c == PS) 1760 1.1 mrg goto L1; 1761 1.1 mrg if (c < 0xD800 || (c >= 0xE000 && c < 0xFFFE)) 1762 1.1 mrg tk = TOK.wcharLiteral; 1763 1.1 mrg else 1764 1.1 mrg tk = TOK.dcharLiteral; 1765 1.1 mrg } 1766 1.1 mrg t.unsvalue = c; 1767 1.1 mrg break; 1768 1.1 mrg } 1769 1.1 mrg if (*p != '\'') 1770 1.1 mrg { 1771 1.1 mrg while (*p != '\'' && *p != 0x1A && *p != 0 && *p != '\n' && 1772 1.1 mrg *p != '\r' && *p != ';' && *p != ')' && *p != ']' && *p != '}') 1773 1.1 mrg { 1774 1.1 mrg if (*p & 0x80) 1775 1.1 mrg { 1776 1.1 mrg const s = p; 1777 1.1 mrg c = decodeUTF(); 1778 1.1 mrg if (c == LS || c == PS) 1779 1.1 mrg { 1780 1.1 mrg p = s; 1781 1.1 mrg break; 1782 1.1 mrg } 1783 1.1 mrg } 1784 1.1 mrg p++; 1785 1.1 mrg } 1786 1.1 mrg 1787 1.1 mrg if (*p == '\'') 1788 1.1 mrg { 1789 1.1 mrg error("character constant has multiple characters"); 1790 1.1 mrg p++; 1791 1.1 mrg } 1792 1.1 mrg else 1793 1.1 mrg error("unterminated character constant"); 1794 1.1 mrg t.unsvalue = '?'; 1795 1.1 mrg return tk; 1796 1.1 mrg } 1797 1.1 mrg p++; 1798 1.1 mrg return tk; 1799 1.1 mrg } 1800 1.1 mrg 1801 1.1 mrg /*************************************** 1802 1.1 mrg * Lex C character constant. 1803 1.1 mrg * Parser is on the opening quote. 1804 1.1 mrg * Params: 1805 1.1 mrg * t = token to fill in 1806 1.1 mrg * prefix = one of `u`, `U` or 0. 1807 1.1 mrg * Reference: 1808 1.1 mrg * C11 6.4.4.4 1809 1.1 mrg */ 1810 1.1 mrg private void clexerCharConstant(ref Token t, char prefix) 1811 1.1 mrg { 1812 1.1 mrg escapeStringConstant(&t); 1813 1.1 mrg const(char)[] str = t.ustring[0 .. t.len]; 1814 1.1 mrg const n = str.length; 1815 1.1 mrg const loc = t.loc; 1816 1.1 mrg if (n == 0) 1817 1.1 mrg { 1818 1.1 mrg error(loc, "empty character constant"); 1819 1.1 mrg t.value = TOK.semicolon; 1820 1.1 mrg return; 1821 1.1 mrg } 1822 1.1 mrg 1823 1.1 mrg uint u; 1824 1.1 mrg switch (prefix) 1825 1.1 mrg { 1826 1.1 mrg case 0: 1827 1.1 mrg if (n == 1) // fast case 1828 1.1 mrg { 1829 1.1 mrg u = str[0]; 1830 1.1 mrg } 1831 1.1 mrg else if (n > 4) 1832 1.1 mrg error(loc, "max number of chars in character literal is 4, had %d", 1833 1.1 mrg cast(int)n); 1834 1.1 mrg else 1835 1.1 mrg { 1836 1.1 mrg foreach (i, c; str) 1837 1.1 mrg (cast(char*)&u)[n - 1 - i] = c; 1838 1.1 mrg } 1839 1.1 mrg break; 1840 1.1 mrg 1841 1.1 mrg case 'u': 1842 1.1 mrg dchar d1; 1843 1.1 mrg size_t idx; 1844 1.1 mrg auto msg = utf_decodeChar(str, idx, d1); 1845 1.1 mrg dchar d2 = 0; 1846 1.1 mrg if (idx < n && !msg) 1847 1.1 mrg msg = utf_decodeChar(str, idx, d2); 1848 1.1 mrg if (msg) 1849 1.1 mrg error(loc, "%s", msg); 1850 1.1 mrg else if (idx < n) 1851 1.1 mrg error(loc, "max number of chars in 16 bit character literal is 2, had %d", 1852 1.1 mrg (n + 1) >> 1); 1853 1.1 mrg else if (d1 > 0x1_0000) 1854 1.1 mrg error(loc, "%d does not fit in 16 bits", d1); 1855 1.1 mrg else if (d2 > 0x1_0000) 1856 1.1 mrg error(loc, "%d does not fit in 16 bits", d2); 1857 1.1 mrg u = d1; 1858 1.1 mrg if (d2) 1859 1.1 mrg u = (d1 << 16) | d2; 1860 1.1 mrg break; 1861 1.1 mrg 1862 1.1 mrg case 'U': 1863 1.1 mrg dchar d; 1864 1.1 mrg size_t idx; 1865 1.1 mrg auto msg = utf_decodeChar(str, idx, d); 1866 1.1 mrg if (msg) 1867 1.1 mrg error(loc, "%s", msg); 1868 1.1 mrg else if (idx < n) 1869 1.1 mrg error(loc, "max number of chars in 32 bit character literal is 1, had %d", 1870 1.1 mrg (n + 3) >> 2); 1871 1.1 mrg u = d; 1872 1.1 mrg break; 1873 1.1 mrg 1874 1.1 mrg default: 1875 1.1 mrg assert(0); 1876 1.1 mrg } 1877 1.1 mrg t.value = n == 1 ? TOK.charLiteral : TOK.int32Literal; 1878 1.1 mrg t.unsvalue = u; 1879 1.1 mrg } 1880 1.1 mrg 1881 1.1 mrg /*************************************** 1882 1.1 mrg * Get postfix of string literal. 1883 1.1 mrg */ 1884 1.1 mrg private void stringPostfix(Token* t) pure @nogc 1885 1.1 mrg { 1886 1.1 mrg switch (*p) 1887 1.1 mrg { 1888 1.1 mrg case 'c': 1889 1.1 mrg case 'w': 1890 1.1 mrg case 'd': 1891 1.1 mrg t.postfix = *p; 1892 1.1 mrg p++; 1893 1.1 mrg break; 1894 1.1 mrg default: 1895 1.1 mrg t.postfix = 0; 1896 1.1 mrg break; 1897 1.1 mrg } 1898 1.1 mrg } 1899 1.1 mrg 1900 1.1 mrg /************************************** 1901 1.1 mrg * Read in a number. 1902 1.1 mrg * If it's an integer, store it in tok.TKutok.Vlong. 1903 1.1 mrg * integers can be decimal, octal or hex 1904 1.1 mrg * Handle the suffixes U, UL, LU, L, etc. 1905 1.1 mrg * If it's double, store it in tok.TKutok.Vdouble. 1906 1.1 mrg * Returns: 1907 1.1 mrg * TKnum 1908 1.1 mrg * TKdouble,... 1909 1.1 mrg */ 1910 1.1 mrg private TOK number(Token* t) 1911 1.1 mrg { 1912 1.1 mrg int base = 10; 1913 1.1 mrg const start = p; 1914 1.1 mrg uinteger_t n = 0; // unsigned >=64 bit integer type 1915 1.1 mrg int d; 1916 1.1 mrg bool err = false; 1917 1.1 mrg bool overflow = false; 1918 1.1 mrg bool anyBinaryDigitsNoSingleUS = false; 1919 1.1 mrg bool anyHexDigitsNoSingleUS = false; 1920 1.1 mrg char errorDigit = 0; 1921 1.1 mrg dchar c = *p; 1922 1.1 mrg if (c == '0') 1923 1.1 mrg { 1924 1.1 mrg ++p; 1925 1.1 mrg c = *p; 1926 1.1 mrg switch (c) 1927 1.1 mrg { 1928 1.1 mrg case '0': 1929 1.1 mrg case '1': 1930 1.1 mrg case '2': 1931 1.1 mrg case '3': 1932 1.1 mrg case '4': 1933 1.1 mrg case '5': 1934 1.1 mrg case '6': 1935 1.1 mrg case '7': 1936 1.1 mrg base = 8; 1937 1.1 mrg break; 1938 1.1 mrg 1939 1.1 mrg case '8': 1940 1.1 mrg case '9': 1941 1.1 mrg errorDigit = cast(char) c; 1942 1.1 mrg base = 8; 1943 1.1 mrg break; 1944 1.1 mrg case 'x': 1945 1.1 mrg case 'X': 1946 1.1 mrg ++p; 1947 1.1 mrg base = 16; 1948 1.1 mrg break; 1949 1.1 mrg case 'b': 1950 1.1 mrg case 'B': 1951 1.1 mrg if (Ccompile) 1952 1.1 mrg error("binary constants not allowed"); 1953 1.1 mrg ++p; 1954 1.1 mrg base = 2; 1955 1.1 mrg break; 1956 1.1 mrg case '.': 1957 1.1 mrg if (p[1] == '.') 1958 1.1 mrg goto Ldone; // if ".." 1959 1.1 mrg if (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80) 1960 1.1 mrg { 1961 1.1 mrg if (Ccompile && (p[1] == 'f' || p[1] == 'F' || p[1] == 'l' || p[1] == 'L')) 1962 1.1 mrg goto Lreal; // if `0.f` or `0.L` 1963 1.1 mrg goto Ldone; // if ".identifier" or ".unicode" 1964 1.1 mrg } 1965 1.1 mrg goto Lreal; // '.' is part of current token 1966 1.1 mrg case 'i': 1967 1.1 mrg case 'f': 1968 1.1 mrg case 'F': 1969 1.1 mrg goto Lreal; 1970 1.1 mrg case '_': 1971 1.1 mrg if (Ccompile) 1972 1.1 mrg error("embedded `_` not allowed"); 1973 1.1 mrg ++p; 1974 1.1 mrg base = 8; 1975 1.1 mrg break; 1976 1.1 mrg case 'L': 1977 1.1 mrg if (p[1] == 'i') 1978 1.1 mrg goto Lreal; 1979 1.1 mrg break; 1980 1.1 mrg default: 1981 1.1 mrg break; 1982 1.1 mrg } 1983 1.1 mrg } 1984 1.1 mrg while (1) 1985 1.1 mrg { 1986 1.1 mrg c = *p; 1987 1.1 mrg switch (c) 1988 1.1 mrg { 1989 1.1 mrg case '0': 1990 1.1 mrg case '1': 1991 1.1 mrg case '2': 1992 1.1 mrg case '3': 1993 1.1 mrg case '4': 1994 1.1 mrg case '5': 1995 1.1 mrg case '6': 1996 1.1 mrg case '7': 1997 1.1 mrg case '8': 1998 1.1 mrg case '9': 1999 1.1 mrg ++p; 2000 1.1 mrg d = c - '0'; 2001 1.1 mrg break; 2002 1.1 mrg case 'a': 2003 1.1 mrg case 'b': 2004 1.1 mrg case 'c': 2005 1.1 mrg case 'd': 2006 1.1 mrg case 'e': 2007 1.1 mrg case 'f': 2008 1.1 mrg case 'A': 2009 1.1 mrg case 'B': 2010 1.1 mrg case 'C': 2011 1.1 mrg case 'D': 2012 1.1 mrg case 'E': 2013 1.1 mrg case 'F': 2014 1.1 mrg ++p; 2015 1.1 mrg if (base != 16) 2016 1.1 mrg { 2017 1.1 mrg if (c == 'e' || c == 'E' || c == 'f' || c == 'F') 2018 1.1 mrg goto Lreal; 2019 1.1 mrg } 2020 1.1 mrg if (c >= 'a') 2021 1.1 mrg d = c + 10 - 'a'; 2022 1.1 mrg else 2023 1.1 mrg d = c + 10 - 'A'; 2024 1.1 mrg break; 2025 1.1 mrg case 'L': 2026 1.1 mrg if (p[1] == 'i') 2027 1.1 mrg goto Lreal; 2028 1.1 mrg goto Ldone; 2029 1.1 mrg case '.': 2030 1.1 mrg if (p[1] == '.') 2031 1.1 mrg goto Ldone; // if ".." 2032 1.1 mrg if (base <= 10 && n > 0 && (isalpha(p[1]) || p[1] == '_' || p[1] & 0x80)) 2033 1.1 mrg { 2034 1.1 mrg if (Ccompile && base == 10 && 2035 1.1 mrg (p[1] == 'e' || p[1] == 'E' || p[1] == 'f' || p[1] == 'F' || p[1] == 'l' || p[1] == 'L')) 2036 1.1 mrg goto Lreal; // if `1.e6` or `1.f` or `1.L` 2037 1.1 mrg goto Ldone; // if ".identifier" or ".unicode" 2038 1.1 mrg } 2039 1.1 mrg if (base == 16 && (!ishex(p[1]) || p[1] == '_' || p[1] & 0x80)) 2040 1.1 mrg goto Ldone; // if ".identifier" or ".unicode" 2041 1.1 mrg if (base == 2) 2042 1.1 mrg goto Ldone; // if ".identifier" or ".unicode" 2043 1.1 mrg goto Lreal; // otherwise as part of a floating point literal 2044 1.1 mrg case 'p': 2045 1.1 mrg case 'P': 2046 1.1 mrg case 'i': 2047 1.1 mrg Lreal: 2048 1.1 mrg p = start; 2049 1.1 mrg return inreal(t); 2050 1.1 mrg case '_': 2051 1.1 mrg if (Ccompile) 2052 1.1 mrg goto default; 2053 1.1 mrg ++p; 2054 1.1 mrg continue; 2055 1.1 mrg default: 2056 1.1 mrg goto Ldone; 2057 1.1 mrg } 2058 1.1 mrg // got a digit here, set any necessary flags, check for errors 2059 1.1 mrg anyHexDigitsNoSingleUS = true; 2060 1.1 mrg anyBinaryDigitsNoSingleUS = true; 2061 1.1 mrg if (!errorDigit && d >= base) 2062 1.1 mrg { 2063 1.1 mrg errorDigit = cast(char) c; 2064 1.1 mrg } 2065 1.1 mrg // Avoid expensive overflow check if we aren't at risk of overflow 2066 1.1 mrg if (n <= 0x0FFF_FFFF_FFFF_FFFFUL) 2067 1.1 mrg n = n * base + d; 2068 1.1 mrg else 2069 1.1 mrg { 2070 1.1 mrg import core.checkedint : mulu, addu; 2071 1.1 mrg 2072 1.1 mrg n = mulu(n, base, overflow); 2073 1.1 mrg n = addu(n, d, overflow); 2074 1.1 mrg } 2075 1.1 mrg } 2076 1.1 mrg Ldone: 2077 1.1 mrg if (errorDigit) 2078 1.1 mrg { 2079 1.1 mrg error("%s digit expected, not `%c`", base == 2 ? "binary".ptr : 2080 1.1 mrg base == 8 ? "octal".ptr : 2081 1.1 mrg "decimal".ptr, errorDigit); 2082 1.1 mrg err = true; 2083 1.1 mrg } 2084 1.1 mrg if (overflow && !err) 2085 1.1 mrg { 2086 1.1 mrg error("integer overflow"); 2087 1.1 mrg err = true; 2088 1.1 mrg } 2089 1.1 mrg if ((base == 2 && !anyBinaryDigitsNoSingleUS) || 2090 1.1 mrg (base == 16 && !anyHexDigitsNoSingleUS)) 2091 1.1 mrg error("`%.*s` isn't a valid integer literal, use `%.*s0` instead", cast(int)(p - start), start, 2, start); 2092 1.1 mrg 2093 1.1 mrg t.unsvalue = n; 2094 1.1 mrg 2095 1.1 mrg if (Ccompile) 2096 1.1 mrg return cnumber(base, n); 2097 1.1 mrg 2098 1.1 mrg enum FLAGS : int 2099 1.1 mrg { 2100 1.1 mrg none = 0, 2101 1.1 mrg decimal = 1, // decimal 2102 1.1 mrg unsigned = 2, // u or U suffix 2103 1.1 mrg long_ = 4, // L suffix 2104 1.1 mrg } 2105 1.1 mrg 2106 1.1 mrg FLAGS flags = (base == 10) ? FLAGS.decimal : FLAGS.none; 2107 1.1 mrg // Parse trailing 'u', 'U', 'l' or 'L' in any combination 2108 1.1 mrg const psuffix = p; 2109 1.1 mrg while (1) 2110 1.1 mrg { 2111 1.1 mrg FLAGS f; 2112 1.1 mrg switch (*p) 2113 1.1 mrg { 2114 1.1 mrg case 'U': 2115 1.1 mrg case 'u': 2116 1.1 mrg f = FLAGS.unsigned; 2117 1.1 mrg goto L1; 2118 1.1 mrg case 'l': 2119 1.1 mrg f = FLAGS.long_; 2120 1.1 mrg error("lower case integer suffix 'l' is not allowed. Please use 'L' instead"); 2121 1.1 mrg goto L1; 2122 1.1 mrg case 'L': 2123 1.1 mrg f = FLAGS.long_; 2124 1.1 mrg L1: 2125 1.1 mrg p++; 2126 1.1 mrg if ((flags & f) && !err) 2127 1.1 mrg { 2128 1.1 mrg error("unrecognized token"); 2129 1.1 mrg err = true; 2130 1.1 mrg } 2131 1.1 mrg flags = cast(FLAGS)(flags | f); 2132 1.1 mrg continue; 2133 1.1 mrg default: 2134 1.1 mrg break; 2135 1.1 mrg } 2136 1.1 mrg break; 2137 1.1 mrg } 2138 1.1 mrg if (base == 8 && n >= 8) 2139 1.1 mrg { 2140 1.1 mrg if (err) 2141 1.1 mrg // can't translate invalid octal value, just show a generic message 2142 1.1 mrg error("octal literals larger than 7 are no longer supported"); 2143 1.1 mrg else 2144 1.1 mrg error("octal literals `0%llo%.*s` are no longer supported, use `std.conv.octal!\"%llo%.*s\"` instead", 2145 1.1 mrg n, cast(int)(p - psuffix), psuffix, n, cast(int)(p - psuffix), psuffix); 2146 1.1 mrg } 2147 1.1 mrg TOK result; 2148 1.1 mrg switch (flags) 2149 1.1 mrg { 2150 1.1 mrg case FLAGS.none: 2151 1.1 mrg /* Octal or Hexadecimal constant. 2152 1.1 mrg * First that fits: int, uint, long, ulong 2153 1.1 mrg */ 2154 1.1 mrg if (n & 0x8000000000000000L) 2155 1.1 mrg result = TOK.uns64Literal; 2156 1.1 mrg else if (n & 0xFFFFFFFF00000000L) 2157 1.1 mrg result = TOK.int64Literal; 2158 1.1 mrg else if (n & 0x80000000) 2159 1.1 mrg result = TOK.uns32Literal; 2160 1.1 mrg else 2161 1.1 mrg result = TOK.int32Literal; 2162 1.1 mrg break; 2163 1.1 mrg case FLAGS.decimal: 2164 1.1 mrg /* First that fits: int, long, long long 2165 1.1 mrg */ 2166 1.1 mrg if (n & 0x8000000000000000L) 2167 1.1 mrg { 2168 1.1 mrg result = TOK.uns64Literal; 2169 1.1 mrg } 2170 1.1 mrg else if (n & 0xFFFFFFFF80000000L) 2171 1.1 mrg result = TOK.int64Literal; 2172 1.1 mrg else 2173 1.1 mrg result = TOK.int32Literal; 2174 1.1 mrg break; 2175 1.1 mrg case FLAGS.unsigned: 2176 1.1 mrg case FLAGS.decimal | FLAGS.unsigned: 2177 1.1 mrg /* First that fits: uint, ulong 2178 1.1 mrg */ 2179 1.1 mrg if (n & 0xFFFFFFFF00000000L) 2180 1.1 mrg result = TOK.uns64Literal; 2181 1.1 mrg else 2182 1.1 mrg result = TOK.uns32Literal; 2183 1.1 mrg break; 2184 1.1 mrg case FLAGS.decimal | FLAGS.long_: 2185 1.1 mrg if (n & 0x8000000000000000L) 2186 1.1 mrg { 2187 1.1 mrg if (!err) 2188 1.1 mrg { 2189 1.1 mrg error("signed integer overflow"); 2190 1.1 mrg err = true; 2191 1.1 mrg } 2192 1.1 mrg result = TOK.uns64Literal; 2193 1.1 mrg } 2194 1.1 mrg else 2195 1.1 mrg result = TOK.int64Literal; 2196 1.1 mrg break; 2197 1.1 mrg case FLAGS.long_: 2198 1.1 mrg if (n & 0x8000000000000000L) 2199 1.1 mrg result = TOK.uns64Literal; 2200 1.1 mrg else 2201 1.1 mrg result = TOK.int64Literal; 2202 1.1 mrg break; 2203 1.1 mrg case FLAGS.unsigned | FLAGS.long_: 2204 1.1 mrg case FLAGS.decimal | FLAGS.unsigned | FLAGS.long_: 2205 1.1 mrg result = TOK.uns64Literal; 2206 1.1 mrg break; 2207 1.1 mrg default: 2208 1.1 mrg debug 2209 1.1 mrg { 2210 1.1 mrg printf("%x\n", flags); 2211 1.1 mrg } 2212 1.1 mrg assert(0); 2213 1.1 mrg } 2214 1.1 mrg return result; 2215 1.1 mrg } 2216 1.1 mrg 2217 1.1 mrg /************************************** 2218 1.1 mrg * Lex C integer-suffix 2219 1.1 mrg * Params: 2220 1.1 mrg * base = number base 2221 1.1 mrg * n = raw integer value 2222 1.1 mrg * Returns: 2223 1.1 mrg * token value 2224 1.1 mrg */ 2225 1.1 mrg private TOK cnumber(int base, uinteger_t n) 2226 1.1 mrg { 2227 1.1 mrg /* C11 6.4.4.1 2228 1.1 mrg * Parse trailing suffixes: 2229 1.1 mrg * u or U 2230 1.1 mrg * l or L 2231 1.1 mrg * ll or LL 2232 1.1 mrg */ 2233 1.1 mrg enum FLAGS : uint 2234 1.1 mrg { 2235 1.1 mrg octalhex = 1, // octal or hexadecimal 2236 1.1 mrg decimal = 2, // decimal 2237 1.1 mrg unsigned = 4, // u or U suffix 2238 1.1 mrg long_ = 8, // l or L suffix 2239 1.1 mrg llong = 0x10 // ll or LL 2240 1.1 mrg } 2241 1.1 mrg FLAGS flags = (base == 10) ? FLAGS.decimal : FLAGS.octalhex; 2242 1.1 mrg bool err; 2243 1.1 mrg Lsuffixes: 2244 1.1 mrg while (1) 2245 1.1 mrg { 2246 1.1 mrg FLAGS f; 2247 1.1 mrg const cs = *p; 2248 1.1 mrg switch (cs) 2249 1.1 mrg { 2250 1.1 mrg case 'U': 2251 1.1 mrg case 'u': 2252 1.1 mrg f = FLAGS.unsigned; 2253 1.1 mrg break; 2254 1.1 mrg 2255 1.1 mrg case 'l': 2256 1.1 mrg case 'L': 2257 1.1 mrg f = FLAGS.long_; 2258 1.1 mrg if (cs == p[1]) 2259 1.1 mrg { 2260 1.1 mrg f = FLAGS.long_ | FLAGS.llong; 2261 1.1 mrg ++p; 2262 1.1 mrg } 2263 1.1 mrg break; 2264 1.1 mrg 2265 1.1 mrg default: 2266 1.1 mrg break Lsuffixes; 2267 1.1 mrg } 2268 1.1 mrg ++p; 2269 1.1 mrg if ((flags & f) && !err) 2270 1.1 mrg { 2271 1.1 mrg error("duplicate integer suffixes"); 2272 1.1 mrg err = true; 2273 1.1 mrg } 2274 1.1 mrg flags = cast(FLAGS)(flags | f); 2275 1.1 mrg } 2276 1.1 mrg 2277 1.1 mrg TOK result = TOK.int32Literal; // default 2278 1.1 mrg switch (flags) 2279 1.1 mrg { 2280 1.1 mrg /* Since D doesn't have a variable sized `long` or `unsigned long` type, 2281 1.1 mrg * this code deviates from C by picking D int, uint, long, or ulong instead 2282 1.1 mrg */ 2283 1.1 mrg 2284 1.1 mrg case FLAGS.octalhex: 2285 1.1 mrg /* Octal or Hexadecimal constant. 2286 1.1 mrg * First that fits: int, unsigned, long, unsigned long, 2287 1.1 mrg * long long, unsigned long long 2288 1.1 mrg */ 2289 1.1 mrg if (n & 0x8000000000000000L) 2290 1.1 mrg result = TOK.uns64Literal; // unsigned long 2291 1.1 mrg else if (n & 0xFFFFFFFF00000000L) 2292 1.1 mrg result = TOK.int64Literal; // long 2293 1.1 mrg else if (n & 0x80000000) 2294 1.1 mrg result = TOK.uns32Literal; 2295 1.1 mrg else 2296 1.1 mrg result = TOK.int32Literal; 2297 1.1 mrg break; 2298 1.1 mrg 2299 1.1 mrg case FLAGS.decimal: 2300 1.1 mrg /* First that fits: int, long, long long 2301 1.1 mrg */ 2302 1.1 mrg if (n & 0x8000000000000000L) 2303 1.1 mrg result = TOK.uns64Literal; // unsigned long 2304 1.1 mrg else if (n & 0xFFFFFFFF80000000L) 2305 1.1 mrg result = TOK.int64Literal; // long 2306 1.1 mrg else 2307 1.1 mrg result = TOK.int32Literal; 2308 1.1 mrg break; 2309 1.1 mrg 2310 1.1 mrg case FLAGS.octalhex | FLAGS.unsigned: 2311 1.1 mrg case FLAGS.decimal | FLAGS.unsigned: 2312 1.1 mrg /* First that fits: unsigned, unsigned long, unsigned long long 2313 1.1 mrg */ 2314 1.1 mrg if (n & 0xFFFFFFFF00000000L) 2315 1.1 mrg result = TOK.uns64Literal; // unsigned long 2316 1.1 mrg else 2317 1.1 mrg result = TOK.uns32Literal; 2318 1.1 mrg break; 2319 1.1 mrg 2320 1.1 mrg case FLAGS.decimal | FLAGS.long_: 2321 1.1 mrg /* First that fits: long, long long 2322 1.1 mrg */ 2323 1.1 mrg if (longsize == 4 || long_longsize == 4) 2324 1.1 mrg { 2325 1.1 mrg if (n & 0xFFFFFFFF_80000000L) 2326 1.1 mrg result = TOK.int64Literal; 2327 1.1 mrg else 2328 1.1 mrg result = TOK.int32Literal; // long 2329 1.1 mrg } 2330 1.1 mrg else 2331 1.1 mrg { 2332 1.1 mrg result = TOK.int64Literal; // long 2333 1.1 mrg } 2334 1.1 mrg break; 2335 1.1 mrg 2336 1.1 mrg case FLAGS.octalhex | FLAGS.long_: 2337 1.1 mrg /* First that fits: long, unsigned long, long long, 2338 1.1 mrg * unsigned long long 2339 1.1 mrg */ 2340 1.1 mrg if (longsize == 4 || long_longsize == 4) 2341 1.1 mrg { 2342 1.1 mrg if (n & 0x8000000000000000L) 2343 1.1 mrg result = TOK.uns64Literal; 2344 1.1 mrg else if (n & 0xFFFFFFFF00000000L) 2345 1.1 mrg result = TOK.int64Literal; 2346 1.1 mrg else if (n & 0x80000000) 2347 1.1 mrg result = TOK.uns32Literal; // unsigned long 2348 1.1 mrg else 2349 1.1 mrg result = TOK.int32Literal; // long 2350 1.1 mrg } 2351 1.1 mrg else 2352 1.1 mrg { 2353 1.1 mrg if (n & 0x80000000_00000000L) 2354 1.1 mrg result = TOK.uns64Literal; // unsigned long 2355 1.1 mrg else 2356 1.1 mrg result = TOK.int64Literal; // long 2357 1.1 mrg } 2358 1.1 mrg break; 2359 1.1 mrg 2360 1.1 mrg case FLAGS.octalhex | FLAGS.unsigned | FLAGS.long_: 2361 1.1 mrg case FLAGS.decimal | FLAGS.unsigned | FLAGS.long_: 2362 1.1 mrg /* First that fits: unsigned long, unsigned long long 2363 1.1 mrg */ 2364 1.1 mrg if (longsize == 4 || long_longsize == 4) 2365 1.1 mrg { 2366 1.1 mrg if (n & 0xFFFFFFFF00000000L) 2367 1.1 mrg result = TOK.uns64Literal; 2368 1.1 mrg else 2369 1.1 mrg result = TOK.uns32Literal; // unsigned long 2370 1.1 mrg } 2371 1.1 mrg else 2372 1.1 mrg { 2373 1.1 mrg result = TOK.uns64Literal; // unsigned long 2374 1.1 mrg } 2375 1.1 mrg break; 2376 1.1 mrg 2377 1.1 mrg case FLAGS.octalhex | FLAGS.long_ | FLAGS.llong: 2378 1.1 mrg /* First that fits: long long, unsigned long long 2379 1.1 mrg */ 2380 1.1 mrg if (n & 0x8000000000000000L) 2381 1.1 mrg result = TOK.uns64Literal; 2382 1.1 mrg else 2383 1.1 mrg result = TOK.int64Literal; 2384 1.1 mrg break; 2385 1.1 mrg 2386 1.1 mrg case FLAGS.decimal | FLAGS.long_ | FLAGS.llong: 2387 1.1 mrg /* long long 2388 1.1 mrg */ 2389 1.1 mrg result = TOK.int64Literal; 2390 1.1 mrg break; 2391 1.1 mrg 2392 1.1 mrg case FLAGS.octalhex | FLAGS.long_ | FLAGS.unsigned | FLAGS.llong: 2393 1.1 mrg case FLAGS.decimal | FLAGS.long_ | FLAGS.unsigned | FLAGS.llong: 2394 1.1 mrg result = TOK.uns64Literal; 2395 1.1 mrg break; 2396 1.1 mrg 2397 1.1 mrg default: 2398 1.1 mrg debug printf("%x\n",flags); 2399 1.1 mrg assert(0); 2400 1.1 mrg } 2401 1.1 mrg return result; 2402 1.1 mrg } 2403 1.1 mrg 2404 1.1 mrg /************************************** 2405 1.1 mrg * Read in characters, converting them to real. 2406 1.1 mrg * Bugs: 2407 1.1 mrg * Exponent overflow not detected. 2408 1.1 mrg * Too much requested precision is not detected. 2409 1.1 mrg */ 2410 1.1 mrg private TOK inreal(Token* t) 2411 1.1 mrg { 2412 1.1 mrg //printf("Lexer::inreal()\n"); 2413 1.1 mrg debug 2414 1.1 mrg { 2415 1.1 mrg assert(*p == '.' || isdigit(*p)); 2416 1.1 mrg } 2417 1.1 mrg bool isWellformedString = true; 2418 1.1 mrg stringbuffer.setsize(0); 2419 1.1 mrg auto pstart = p; 2420 1.1 mrg bool hex = false; 2421 1.1 mrg dchar c = *p++; 2422 1.1 mrg // Leading '0x' 2423 1.1 mrg if (c == '0') 2424 1.1 mrg { 2425 1.1 mrg c = *p++; 2426 1.1 mrg if (c == 'x' || c == 'X') 2427 1.1 mrg { 2428 1.1 mrg hex = true; 2429 1.1 mrg c = *p++; 2430 1.1 mrg } 2431 1.1 mrg } 2432 1.1 mrg // Digits to left of '.' 2433 1.1 mrg while (1) 2434 1.1 mrg { 2435 1.1 mrg if (c == '.') 2436 1.1 mrg { 2437 1.1 mrg c = *p++; 2438 1.1 mrg break; 2439 1.1 mrg } 2440 1.1 mrg if (isdigit(c) || (hex && isxdigit(c)) || c == '_') 2441 1.1 mrg { 2442 1.1 mrg c = *p++; 2443 1.1 mrg continue; 2444 1.1 mrg } 2445 1.1 mrg break; 2446 1.1 mrg } 2447 1.1 mrg // Digits to right of '.' 2448 1.1 mrg while (1) 2449 1.1 mrg { 2450 1.1 mrg if (isdigit(c) || (hex && isxdigit(c)) || c == '_') 2451 1.1 mrg { 2452 1.1 mrg c = *p++; 2453 1.1 mrg continue; 2454 1.1 mrg } 2455 1.1 mrg break; 2456 1.1 mrg } 2457 1.1 mrg if (c == 'e' || c == 'E' || (hex && (c == 'p' || c == 'P'))) 2458 1.1 mrg { 2459 1.1 mrg c = *p++; 2460 1.1 mrg if (c == '-' || c == '+') 2461 1.1 mrg { 2462 1.1 mrg c = *p++; 2463 1.1 mrg } 2464 1.1 mrg bool anyexp = false; 2465 1.1 mrg while (1) 2466 1.1 mrg { 2467 1.1 mrg if (isdigit(c)) 2468 1.1 mrg { 2469 1.1 mrg anyexp = true; 2470 1.1 mrg c = *p++; 2471 1.1 mrg continue; 2472 1.1 mrg } 2473 1.1 mrg if (c == '_') 2474 1.1 mrg { 2475 1.1 mrg if (Ccompile) 2476 1.1 mrg error("embedded `_` in numeric literals not allowed"); 2477 1.1 mrg c = *p++; 2478 1.1 mrg continue; 2479 1.1 mrg } 2480 1.1 mrg if (!anyexp) 2481 1.1 mrg { 2482 1.1 mrg error("missing exponent"); 2483 1.1 mrg isWellformedString = false; 2484 1.1 mrg } 2485 1.1 mrg break; 2486 1.1 mrg } 2487 1.1 mrg } 2488 1.1 mrg else if (hex) 2489 1.1 mrg { 2490 1.1 mrg error("exponent required for hex float"); 2491 1.1 mrg isWellformedString = false; 2492 1.1 mrg } 2493 1.1 mrg --p; 2494 1.1 mrg while (pstart < p) 2495 1.1 mrg { 2496 1.1 mrg if (*pstart != '_') 2497 1.1 mrg stringbuffer.writeByte(*pstart); 2498 1.1 mrg ++pstart; 2499 1.1 mrg } 2500 1.1 mrg stringbuffer.writeByte(0); 2501 1.1 mrg auto sbufptr = cast(const(char)*)stringbuffer[].ptr; 2502 1.1 mrg TOK result; 2503 1.1 mrg bool isOutOfRange = false; 2504 1.1 mrg t.floatvalue = (isWellformedString ? CTFloat.parse(sbufptr, &isOutOfRange) : CTFloat.zero); 2505 1.1 mrg switch (*p) 2506 1.1 mrg { 2507 1.1 mrg case 'F': 2508 1.1 mrg case 'f': 2509 1.1 mrg if (isWellformedString && !isOutOfRange) 2510 1.1 mrg isOutOfRange = Port.isFloat32LiteralOutOfRange(sbufptr); 2511 1.1 mrg result = TOK.float32Literal; 2512 1.1 mrg p++; 2513 1.1 mrg break; 2514 1.1 mrg default: 2515 1.1 mrg if (isWellformedString && !isOutOfRange) 2516 1.1 mrg isOutOfRange = Port.isFloat64LiteralOutOfRange(sbufptr); 2517 1.1 mrg result = TOK.float64Literal; 2518 1.1 mrg break; 2519 1.1 mrg case 'l': 2520 1.1 mrg if (!Ccompile) 2521 1.1 mrg error("use 'L' suffix instead of 'l'"); 2522 1.1 mrg goto case 'L'; 2523 1.1 mrg case 'L': 2524 1.1 mrg ++p; 2525 1.1 mrg if (Ccompile && long_doublesize == 8) 2526 1.1 mrg goto default; 2527 1.1 mrg result = TOK.float80Literal; 2528 1.1 mrg break; 2529 1.1 mrg } 2530 1.1 mrg if ((*p == 'i' || *p == 'I') && !Ccompile) 2531 1.1 mrg { 2532 1.1 mrg if (*p == 'I') 2533 1.1 mrg error("use 'i' suffix instead of 'I'"); 2534 1.1 mrg p++; 2535 1.1 mrg switch (result) 2536 1.1 mrg { 2537 1.1 mrg case TOK.float32Literal: 2538 1.1 mrg result = TOK.imaginary32Literal; 2539 1.1 mrg break; 2540 1.1 mrg case TOK.float64Literal: 2541 1.1 mrg result = TOK.imaginary64Literal; 2542 1.1 mrg break; 2543 1.1 mrg case TOK.float80Literal: 2544 1.1 mrg result = TOK.imaginary80Literal; 2545 1.1 mrg break; 2546 1.1 mrg default: 2547 1.1 mrg break; 2548 1.1 mrg } 2549 1.1 mrg } 2550 1.1 mrg const isLong = (result == TOK.float80Literal || result == TOK.imaginary80Literal); 2551 1.1 mrg if (isOutOfRange && !isLong && (!Ccompile || hex)) 2552 1.1 mrg { 2553 1.1 mrg /* C11 6.4.4.2 doesn't actually care if it is not representable if it is not hex 2554 1.1 mrg */ 2555 1.1 mrg const char* suffix = (result == TOK.float32Literal || result == TOK.imaginary32Literal) ? "f" : ""; 2556 1.1 mrg error(scanloc, "number `%s%s` is not representable", sbufptr, suffix); 2557 1.1 mrg } 2558 1.1 mrg debug 2559 1.1 mrg { 2560 1.1 mrg switch (result) 2561 1.1 mrg { 2562 1.1 mrg case TOK.float32Literal: 2563 1.1 mrg case TOK.float64Literal: 2564 1.1 mrg case TOK.float80Literal: 2565 1.1 mrg case TOK.imaginary32Literal: 2566 1.1 mrg case TOK.imaginary64Literal: 2567 1.1 mrg case TOK.imaginary80Literal: 2568 1.1 mrg break; 2569 1.1 mrg default: 2570 1.1 mrg assert(0); 2571 1.1 mrg } 2572 1.1 mrg } 2573 1.1 mrg return result; 2574 1.1 mrg } 2575 1.1 mrg 2576 1.1 mrg final Loc loc() pure @nogc 2577 1.1 mrg { 2578 1.1 mrg scanloc.charnum = cast(uint)(1 + p - line); 2579 1.1 mrg version (LocOffset) 2580 1.1 mrg scanloc.fileOffset = cast(uint)(p - base); 2581 1.1 mrg return scanloc; 2582 1.1 mrg } 2583 1.1 mrg 2584 1.1 mrg final void error(const(char)* format, ...) 2585 1.1 mrg { 2586 1.1 mrg va_list args; 2587 1.1 mrg va_start(args, format); 2588 1.1 mrg .verror(token.loc, format, args); 2589 1.1 mrg va_end(args); 2590 1.1 mrg } 2591 1.1 mrg 2592 1.1 mrg final void error(const ref Loc loc, const(char)* format, ...) 2593 1.1 mrg { 2594 1.1 mrg va_list args; 2595 1.1 mrg va_start(args, format); 2596 1.1 mrg .verror(loc, format, args); 2597 1.1 mrg va_end(args); 2598 1.1 mrg } 2599 1.1 mrg 2600 1.1 mrg final void deprecation(const(char)* format, ...) 2601 1.1 mrg { 2602 1.1 mrg va_list args; 2603 1.1 mrg va_start(args, format); 2604 1.1 mrg .vdeprecation(token.loc, format, args); 2605 1.1 mrg va_end(args); 2606 1.1 mrg } 2607 1.1 mrg 2608 1.1 mrg /*************************************** 2609 1.1 mrg * Parse special token sequence: 2610 1.1 mrg * Returns: 2611 1.1 mrg * true if the special token sequence was handled 2612 1.1 mrg * References: 2613 1.1 mrg * https://dlang.org/spec/lex.html#special-token-sequence 2614 1.1 mrg */ 2615 1.1 mrg bool parseSpecialTokenSequence() 2616 1.1 mrg { 2617 1.1 mrg Token n; 2618 1.1 mrg scan(&n); 2619 1.1 mrg if (n.value == TOK.identifier) 2620 1.1 mrg { 2621 1.1 mrg if (n.ident == Id.line) 2622 1.1 mrg { 2623 1.1 mrg poundLine(n, false); 2624 1.1 mrg return true; 2625 1.1 mrg } 2626 1.1 mrg else 2627 1.1 mrg { 2628 1.1 mrg const locx = loc(); 2629 1.1 mrg warning(locx, "C preprocessor directive `#%s` is not supported", n.ident.toChars()); 2630 1.1 mrg } 2631 1.1 mrg } 2632 1.1 mrg else if (n.value == TOK.if_) 2633 1.1 mrg { 2634 1.1 mrg error("C preprocessor directive `#if` is not supported, use `version` or `static if`"); 2635 1.1 mrg } 2636 1.1 mrg return false; 2637 1.1 mrg } 2638 1.1 mrg 2639 1.1 mrg /********************************************* 2640 1.1 mrg * Parse line/file preprocessor directive: 2641 1.1 mrg * #line linnum [filespec] 2642 1.1 mrg * Allow __LINE__ for linnum, and __FILE__ for filespec. 2643 1.1 mrg * Accept linemarker format: 2644 1.1 mrg * # linnum [filespec] {flags} 2645 1.1 mrg * There can be zero or more flags, which are one of the digits 1..4, and 2646 1.1 mrg * must be in ascending order. The flags are ignored. 2647 1.1 mrg * Params: 2648 1.1 mrg * tok = token we're on, which is linnum of linemarker 2649 1.1 mrg * linemarker = true if line marker format and lexer is on linnum 2650 1.1 mrg * References: 2651 1.1 mrg * linemarker https://gcc.gnu.org/onlinedocs/gcc-11.1.0/cpp/Preprocessor-Output.html 2652 1.1 mrg */ 2653 1.1 mrg final void poundLine(ref Token tok, bool linemarker) 2654 1.1 mrg { 2655 1.1 mrg auto linnum = this.scanloc.linnum; 2656 1.1 mrg const(char)* filespec = null; 2657 1.1 mrg bool flags; 2658 1.1 mrg 2659 1.1 mrg if (!linemarker) 2660 1.1 mrg scan(&tok); 2661 1.1 mrg if (tok.value == TOK.int32Literal || tok.value == TOK.int64Literal) 2662 1.1 mrg { 2663 1.1 mrg const lin = cast(int)(tok.unsvalue); 2664 1.1 mrg if (lin != tok.unsvalue) 2665 1.1 mrg { 2666 1.1 mrg error(tok.loc, "line number `%lld` out of range", cast(ulong)tok.unsvalue); 2667 1.1 mrg skipToNextLine(); 2668 1.1 mrg return; 2669 1.1 mrg } 2670 1.1 mrg else 2671 1.1 mrg linnum = lin; 2672 1.1 mrg } 2673 1.1 mrg else if (tok.value == TOK.line) // #line __LINE__ 2674 1.1 mrg { 2675 1.1 mrg } 2676 1.1 mrg else 2677 1.1 mrg { 2678 1.1 mrg error(tok.loc, "positive integer argument expected following `#line`"); 2679 1.1 mrg if (tok.value != TOK.endOfLine) 2680 1.1 mrg skipToNextLine(); 2681 1.1 mrg return; 2682 1.1 mrg } 2683 1.1 mrg while (1) 2684 1.1 mrg { 2685 1.1 mrg scan(&tok); 2686 1.1 mrg switch (tok.value) 2687 1.1 mrg { 2688 1.1 mrg case TOK.endOfFile: 2689 1.1 mrg case TOK.endOfLine: 2690 1.1 mrg if (!inTokenStringConstant) 2691 1.1 mrg { 2692 1.1 mrg this.scanloc.linnum = linnum; 2693 1.1 mrg if (filespec) 2694 1.1 mrg this.scanloc.filename = filespec; 2695 1.1 mrg } 2696 1.1 mrg return; 2697 1.1 mrg case TOK.file: 2698 1.1 mrg if (filespec || flags) 2699 1.1 mrg goto Lerr; 2700 1.1 mrg filespec = mem.xstrdup(scanloc.filename); 2701 1.1 mrg continue; 2702 1.1 mrg case TOK.string_: 2703 1.1 mrg if (filespec || flags) 2704 1.1 mrg goto Lerr; 2705 1.1 mrg if (tok.ptr[0] != '"' || tok.postfix != 0) 2706 1.1 mrg goto Lerr; 2707 1.1 mrg filespec = tok.ustring; 2708 1.1 mrg continue; 2709 1.1 mrg case TOK.int32Literal: 2710 1.1 mrg if (!filespec) 2711 1.1 mrg goto Lerr; 2712 1.1 mrg if (linemarker && tok.unsvalue >= 1 && tok.unsvalue <= 4) 2713 1.1 mrg { 2714 1.1 mrg flags = true; // linemarker flags seen 2715 1.1 mrg continue; 2716 1.1 mrg } 2717 1.1 mrg goto Lerr; 2718 1.1 mrg default: 2719 1.1 mrg goto Lerr; 2720 1.1 mrg } 2721 1.1 mrg } 2722 1.1 mrg Lerr: 2723 1.1 mrg if (filespec is null) 2724 1.1 mrg error(tok.loc, "invalid filename for `#line` directive"); 2725 1.1 mrg else if (linemarker) 2726 1.1 mrg error(tok.loc, "invalid flag for line marker directive"); 2727 1.1 mrg else if (!Ccompile) 2728 1.1 mrg error(tok.loc, "found `%s` when expecting new line following `#line` directive", tok.toChars()); 2729 1.1 mrg if (tok.value != TOK.endOfLine) 2730 1.1 mrg skipToNextLine(); 2731 1.1 mrg } 2732 1.1 mrg 2733 1.1 mrg /*************************************** 2734 1.1 mrg * Scan forward to start of next line. 2735 1.1 mrg */ 2736 1.1 mrg final void skipToNextLine() 2737 1.1 mrg { 2738 1.1 mrg while (1) 2739 1.1 mrg { 2740 1.1 mrg switch (*p) 2741 1.1 mrg { 2742 1.1 mrg case 0: 2743 1.1 mrg case 0x1A: 2744 1.1 mrg return; // do not advance p 2745 1.1 mrg 2746 1.1 mrg case '\n': 2747 1.1 mrg ++p; 2748 1.1 mrg break; 2749 1.1 mrg 2750 1.1 mrg case '\r': 2751 1.1 mrg ++p; 2752 1.1 mrg if (p[0] == '\n') 2753 1.1 mrg ++p; 2754 1.1 mrg break; 2755 1.1 mrg 2756 1.1 mrg default: 2757 1.1 mrg if (*p & 0x80) 2758 1.1 mrg { 2759 1.1 mrg const u = decodeUTF(); 2760 1.1 mrg if (u == PS || u == LS) 2761 1.1 mrg { 2762 1.1 mrg ++p; 2763 1.1 mrg break; 2764 1.1 mrg } 2765 1.1 mrg } 2766 1.1 mrg ++p; 2767 1.1 mrg continue; 2768 1.1 mrg } 2769 1.1 mrg break; 2770 1.1 mrg } 2771 1.1 mrg endOfLine(); 2772 1.1 mrg tokenizeNewlines = false; 2773 1.1 mrg } 2774 1.1 mrg 2775 1.1 mrg /******************************************** 2776 1.1 mrg * Decode UTF character. 2777 1.1 mrg * Issue error messages for invalid sequences. 2778 1.1 mrg * Return decoded character, advance p to last character in UTF sequence. 2779 1.1 mrg */ 2780 1.1 mrg private uint decodeUTF() 2781 1.1 mrg { 2782 1.1 mrg const s = p; 2783 1.1 mrg assert(*s & 0x80); 2784 1.1 mrg // Check length of remaining string up to 4 UTF-8 characters 2785 1.1 mrg size_t len; 2786 1.1 mrg for (len = 1; len < 4 && s[len]; len++) 2787 1.1 mrg { 2788 1.1 mrg } 2789 1.1 mrg size_t idx = 0; 2790 1.1 mrg dchar u; 2791 1.1 mrg const msg = utf_decodeChar(s[0 .. len], idx, u); 2792 1.1 mrg p += idx - 1; 2793 1.1 mrg if (msg) 2794 1.1 mrg { 2795 1.1 mrg error("%.*s", cast(int)msg.length, msg.ptr); 2796 1.1 mrg } 2797 1.1 mrg return u; 2798 1.1 mrg } 2799 1.1 mrg 2800 1.1 mrg /*************************************************** 2801 1.1 mrg * Parse doc comment embedded between t.ptr and p. 2802 1.1 mrg * Remove trailing blanks and tabs from lines. 2803 1.1 mrg * Replace all newlines with \n. 2804 1.1 mrg * Remove leading comment character from each line. 2805 1.1 mrg * Decide if it's a lineComment or a blockComment. 2806 1.1 mrg * Append to previous one for this token. 2807 1.1 mrg * 2808 1.1 mrg * If newParagraph is true, an extra newline will be 2809 1.1 mrg * added between adjoining doc comments. 2810 1.1 mrg */ 2811 1.1 mrg private void getDocComment(Token* t, uint lineComment, bool newParagraph) pure 2812 1.1 mrg { 2813 1.1 mrg /* ct tells us which kind of comment it is: '/', '*', or '+' 2814 1.1 mrg */ 2815 1.1 mrg const ct = t.ptr[2]; 2816 1.1 mrg /* Start of comment text skips over / * *, / + +, or / / / 2817 1.1 mrg */ 2818 1.1 mrg const(char)* q = t.ptr + 3; // start of comment text 2819 1.1 mrg const(char)* qend = p; 2820 1.1 mrg if (ct == '*' || ct == '+') 2821 1.1 mrg qend -= 2; 2822 1.1 mrg /* Scan over initial row of ****'s or ++++'s or ////'s 2823 1.1 mrg */ 2824 1.1 mrg for (; q < qend; q++) 2825 1.1 mrg { 2826 1.1 mrg if (*q != ct) 2827 1.1 mrg break; 2828 1.1 mrg } 2829 1.1 mrg /* Remove leading spaces until start of the comment 2830 1.1 mrg */ 2831 1.1 mrg int linestart = 0; 2832 1.1 mrg if (ct == '/') 2833 1.1 mrg { 2834 1.1 mrg while (q < qend && (*q == ' ' || *q == '\t')) 2835 1.1 mrg ++q; 2836 1.1 mrg } 2837 1.1 mrg else if (q < qend) 2838 1.1 mrg { 2839 1.1 mrg if (*q == '\r') 2840 1.1 mrg { 2841 1.1 mrg ++q; 2842 1.1 mrg if (q < qend && *q == '\n') 2843 1.1 mrg ++q; 2844 1.1 mrg linestart = 1; 2845 1.1 mrg } 2846 1.1 mrg else if (*q == '\n') 2847 1.1 mrg { 2848 1.1 mrg ++q; 2849 1.1 mrg linestart = 1; 2850 1.1 mrg } 2851 1.1 mrg } 2852 1.1 mrg /* Remove trailing row of ****'s or ++++'s 2853 1.1 mrg */ 2854 1.1 mrg if (ct != '/') 2855 1.1 mrg { 2856 1.1 mrg for (; q < qend; qend--) 2857 1.1 mrg { 2858 1.1 mrg if (qend[-1] != ct) 2859 1.1 mrg break; 2860 1.1 mrg } 2861 1.1 mrg } 2862 1.1 mrg /* Comment is now [q .. qend]. 2863 1.1 mrg * Canonicalize it into buf[]. 2864 1.1 mrg */ 2865 1.1 mrg OutBuffer buf; 2866 1.1 mrg 2867 1.1 mrg void trimTrailingWhitespace() 2868 1.1 mrg { 2869 1.1 mrg const s = buf[]; 2870 1.1 mrg auto len = s.length; 2871 1.1 mrg while (len && (s[len - 1] == ' ' || s[len - 1] == '\t')) 2872 1.1 mrg --len; 2873 1.1 mrg buf.setsize(len); 2874 1.1 mrg } 2875 1.1 mrg 2876 1.1 mrg for (; q < qend; q++) 2877 1.1 mrg { 2878 1.1 mrg char c = *q; 2879 1.1 mrg switch (c) 2880 1.1 mrg { 2881 1.1 mrg case '*': 2882 1.1 mrg case '+': 2883 1.1 mrg if (linestart && c == ct) 2884 1.1 mrg { 2885 1.1 mrg linestart = 0; 2886 1.1 mrg /* Trim preceding whitespace up to preceding \n 2887 1.1 mrg */ 2888 1.1 mrg trimTrailingWhitespace(); 2889 1.1 mrg continue; 2890 1.1 mrg } 2891 1.1 mrg break; 2892 1.1 mrg case ' ': 2893 1.1 mrg case '\t': 2894 1.1 mrg break; 2895 1.1 mrg case '\r': 2896 1.1 mrg if (q[1] == '\n') 2897 1.1 mrg continue; // skip the \r 2898 1.1 mrg goto Lnewline; 2899 1.1 mrg default: 2900 1.1 mrg if (c == 226) 2901 1.1 mrg { 2902 1.1 mrg // If LS or PS 2903 1.1 mrg if (q[1] == 128 && (q[2] == 168 || q[2] == 169)) 2904 1.1 mrg { 2905 1.1 mrg q += 2; 2906 1.1 mrg goto Lnewline; 2907 1.1 mrg } 2908 1.1 mrg } 2909 1.1 mrg linestart = 0; 2910 1.1 mrg break; 2911 1.1 mrg Lnewline: 2912 1.1 mrg c = '\n'; // replace all newlines with \n 2913 1.1 mrg goto case; 2914 1.1 mrg case '\n': 2915 1.1 mrg linestart = 1; 2916 1.1 mrg /* Trim trailing whitespace 2917 1.1 mrg */ 2918 1.1 mrg trimTrailingWhitespace(); 2919 1.1 mrg break; 2920 1.1 mrg } 2921 1.1 mrg buf.writeByte(c); 2922 1.1 mrg } 2923 1.1 mrg /* Trim trailing whitespace (if the last line does not have newline) 2924 1.1 mrg */ 2925 1.1 mrg trimTrailingWhitespace(); 2926 1.1 mrg 2927 1.1 mrg // Always end with a newline 2928 1.1 mrg const s = buf[]; 2929 1.1 mrg if (s.length == 0 || s[$ - 1] != '\n') 2930 1.1 mrg buf.writeByte('\n'); 2931 1.1 mrg 2932 1.1 mrg // It's a line comment if the start of the doc comment comes 2933 1.1 mrg // after other non-whitespace on the same line. 2934 1.1 mrg auto dc = (lineComment && anyToken) ? &t.lineComment : &t.blockComment; 2935 1.1 mrg // Combine with previous doc comment, if any 2936 1.1 mrg if (*dc) 2937 1.1 mrg *dc = combineComments(*dc, buf[], newParagraph).toDString(); 2938 1.1 mrg else 2939 1.1 mrg *dc = buf.extractSlice(true); 2940 1.1 mrg } 2941 1.1 mrg 2942 1.1 mrg /******************************************** 2943 1.1 mrg * Combine two document comments into one, 2944 1.1 mrg * separated by an extra newline if newParagraph is true. 2945 1.1 mrg */ 2946 1.1 mrg static const(char)* combineComments(const(char)[] c1, const(char)[] c2, bool newParagraph) pure 2947 1.1 mrg { 2948 1.1 mrg //debug printf("Lexer::combineComments('%*.s', '%*.s', '%i')\n", cast(int) c1.length, c1.ptr, cast(int) c2.length, c2.ptr, newParagraph); 2949 1.1 mrg const(int) newParagraphSize = newParagraph ? 1 : 0; // Size of the combining '\n' 2950 1.1 mrg if (!c1) 2951 1.1 mrg return c2.ptr; 2952 1.1 mrg if (!c2) 2953 1.1 mrg return c1.ptr; 2954 1.1 mrg 2955 1.1 mrg int insertNewLine = 0; 2956 1.1 mrg if (c1.length && c1[$ - 1] != '\n') 2957 1.1 mrg insertNewLine = 1; 2958 1.1 mrg const retSize = c1.length + insertNewLine + newParagraphSize + c2.length; 2959 1.1 mrg auto p = cast(char*)mem.xmalloc_noscan(retSize + 1); 2960 1.1 mrg p[0 .. c1.length] = c1[]; 2961 1.1 mrg if (insertNewLine) 2962 1.1 mrg p[c1.length] = '\n'; 2963 1.1 mrg if (newParagraph) 2964 1.1 mrg p[c1.length + insertNewLine] = '\n'; 2965 1.1 mrg p[retSize - c2.length .. retSize] = c2[]; 2966 1.1 mrg p[retSize] = 0; 2967 1.1 mrg return p; 2968 1.1 mrg } 2969 1.1 mrg 2970 1.1 mrg /************************** 2971 1.1 mrg * `p` should be at start of next line 2972 1.1 mrg */ 2973 1.1 mrg private void endOfLine() pure @nogc @safe 2974 1.1 mrg { 2975 1.1 mrg scanloc.linnum++; 2976 1.1 mrg line = p; 2977 1.1 mrg } 2978 1.1 mrg } 2979 1.1 mrg 2980 1.1 mrg 2981 1.1 mrg /******************************* Private *****************************************/ 2982 1.1 mrg 2983 1.1 mrg private: 2984 1.1 mrg 2985 1.1 mrg /// Support for `__DATE__`, `__TIME__`, and `__TIMESTAMP__` 2986 1.1 mrg private struct TimeStampInfo 2987 1.1 mrg { 2988 1.1 mrg private __gshared bool initdone = false; 2989 1.1 mrg 2990 1.1 mrg // Note: Those properties need to be guarded by a call to `init` 2991 1.1 mrg // The API isn't safe, and quite brittle, but it was left this way 2992 1.1 mrg // over performance concerns. 2993 1.1 mrg // This is currently only called once, from the lexer. 2994 1.1 mrg __gshared char[11 + 1] date; 2995 1.1 mrg __gshared char[8 + 1] time; 2996 1.1 mrg __gshared char[24 + 1] timestamp; 2997 1.1 mrg 2998 1.1 mrg public static void initialize(const ref Loc loc) nothrow 2999 1.1 mrg { 3000 1.1 mrg if (initdone) 3001 1.1 mrg return; 3002 1.1 mrg 3003 1.1 mrg initdone = true; 3004 1.1 mrg time_t ct; 3005 1.1 mrg // https://issues.dlang.org/show_bug.cgi?id=20444 3006 1.1 mrg if (auto p = getenv("SOURCE_DATE_EPOCH")) 3007 1.1 mrg { 3008 1.1 mrg if (!ct.parseDigits(p.toDString())) 3009 1.1 mrg error(loc, "value of environment variable `SOURCE_DATE_EPOCH` should be a valid UNIX timestamp, not: `%s`", p); 3010 1.1 mrg } 3011 1.1 mrg else 3012 1.1 mrg .time(&ct); 3013 1.1 mrg const p = ctime(&ct); 3014 1.1 mrg assert(p); 3015 1.1 mrg sprintf(&date[0], "%.6s %.4s", p + 4, p + 20); 3016 1.1 mrg sprintf(&time[0], "%.8s", p + 11); 3017 1.1 mrg sprintf(×tamp[0], "%.24s", p); 3018 1.1 mrg } 3019 1.1 mrg } 3020 1.1 mrg 3021 1.1 mrg private enum LS = 0x2028; // UTF line separator 3022 1.1 mrg private enum PS = 0x2029; // UTF paragraph separator 3023 1.1 mrg 3024 1.1 mrg /******************************************** 3025 1.1 mrg * Do our own char maps 3026 1.1 mrg */ 3027 1.1 mrg private static immutable cmtable = () 3028 1.1 mrg { 3029 1.1 mrg ubyte[256] table; 3030 1.1 mrg foreach (const c; 0 .. table.length) 3031 1.1 mrg { 3032 1.1 mrg if ('0' <= c && c <= '7') 3033 1.1 mrg table[c] |= CMoctal; 3034 1.1 mrg if (c_isxdigit(c)) 3035 1.1 mrg table[c] |= CMhex; 3036 1.1 mrg if (c_isalnum(c) || c == '_') 3037 1.1 mrg table[c] |= CMidchar; 3038 1.1 mrg 3039 1.1 mrg switch (c) 3040 1.1 mrg { 3041 1.1 mrg case 'x': case 'X': 3042 1.1 mrg case 'b': case 'B': 3043 1.1 mrg table[c] |= CMzerosecond; 3044 1.1 mrg break; 3045 1.1 mrg 3046 1.1 mrg case '0': .. case '9': 3047 1.1 mrg case 'e': case 'E': 3048 1.1 mrg case 'f': case 'F': 3049 1.1 mrg case 'l': case 'L': 3050 1.1 mrg case 'p': case 'P': 3051 1.1 mrg case 'u': case 'U': 3052 1.1 mrg case 'i': 3053 1.1 mrg case '.': 3054 1.1 mrg case '_': 3055 1.1 mrg table[c] |= CMzerosecond | CMdigitsecond; 3056 1.1 mrg break; 3057 1.1 mrg 3058 1.1 mrg default: 3059 1.1 mrg break; 3060 1.1 mrg } 3061 1.1 mrg 3062 1.1 mrg switch (c) 3063 1.1 mrg { 3064 1.1 mrg case '\\': 3065 1.1 mrg case '\n': 3066 1.1 mrg case '\r': 3067 1.1 mrg case 0: 3068 1.1 mrg case 0x1A: 3069 1.1 mrg case '\'': 3070 1.1 mrg break; 3071 1.1 mrg default: 3072 1.1 mrg if (!(c & 0x80)) 3073 1.1 mrg table[c] |= CMsinglechar; 3074 1.1 mrg break; 3075 1.1 mrg } 3076 1.1 mrg } 3077 1.1 mrg return table; 3078 1.1 mrg }(); 3079 1.1 mrg 3080 1.1 mrg private 3081 1.1 mrg { 3082 1.1 mrg enum CMoctal = 0x1; 3083 1.1 mrg enum CMhex = 0x2; 3084 1.1 mrg enum CMidchar = 0x4; 3085 1.1 mrg enum CMzerosecond = 0x8; 3086 1.1 mrg enum CMdigitsecond = 0x10; 3087 1.1 mrg enum CMsinglechar = 0x20; 3088 1.1 mrg } 3089 1.1 mrg 3090 1.1 mrg private bool isoctal(const char c) pure @nogc @safe 3091 1.1 mrg { 3092 1.1 mrg return (cmtable[c] & CMoctal) != 0; 3093 1.1 mrg } 3094 1.1 mrg 3095 1.1 mrg private bool ishex(const char c) pure @nogc @safe 3096 1.1 mrg { 3097 1.1 mrg return (cmtable[c] & CMhex) != 0; 3098 1.1 mrg } 3099 1.1 mrg 3100 1.1 mrg private bool isidchar(const char c) pure @nogc @safe 3101 1.1 mrg { 3102 1.1 mrg return (cmtable[c] & CMidchar) != 0; 3103 1.1 mrg } 3104 1.1 mrg 3105 1.1 mrg private bool isZeroSecond(const char c) pure @nogc @safe 3106 1.1 mrg { 3107 1.1 mrg return (cmtable[c] & CMzerosecond) != 0; 3108 1.1 mrg } 3109 1.1 mrg 3110 1.1 mrg private bool isDigitSecond(const char c) pure @nogc @safe 3111 1.1 mrg { 3112 1.1 mrg return (cmtable[c] & CMdigitsecond) != 0; 3113 1.1 mrg } 3114 1.1 mrg 3115 1.1 mrg private bool issinglechar(const char c) pure @nogc @safe 3116 1.1 mrg { 3117 1.1 mrg return (cmtable[c] & CMsinglechar) != 0; 3118 1.1 mrg } 3119 1.1 mrg 3120 1.1 mrg private bool c_isxdigit(const int c) pure @nogc @safe 3121 1.1 mrg { 3122 1.1 mrg return (( c >= '0' && c <= '9') || 3123 1.1 mrg ( c >= 'a' && c <= 'f') || 3124 1.1 mrg ( c >= 'A' && c <= 'F')); 3125 1.1 mrg } 3126 1.1 mrg 3127 1.1 mrg private bool c_isalnum(const int c) pure @nogc @safe 3128 1.1 mrg { 3129 1.1 mrg return (( c >= '0' && c <= '9') || 3130 1.1 mrg ( c >= 'a' && c <= 'z') || 3131 1.1 mrg ( c >= 'A' && c <= 'Z')); 3132 1.1 mrg } 3133 1.1 mrg 3134 1.1 mrg /******************************* Unittest *****************************************/ 3135 1.1 mrg 3136 1.1 mrg unittest 3137 1.1 mrg { 3138 1.1 mrg import dmd.console; 3139 1.1 mrg nothrow bool assertDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header, 3140 1.1 mrg const(char)* format, va_list ap, const(char)* p1, const(char)* p2) 3141 1.1 mrg { 3142 1.1 mrg assert(0); 3143 1.1 mrg } 3144 1.1 mrg diagnosticHandler = &assertDiagnosticHandler; 3145 1.1 mrg 3146 1.1 mrg static void test(T)(string sequence, T expected, bool Ccompile = false) 3147 1.1 mrg { 3148 1.1 mrg auto p = cast(const(char)*)sequence.ptr; 3149 1.1 mrg assert(expected == Lexer.escapeSequence(Loc.initial, p, Ccompile)); 3150 1.1 mrg assert(p == sequence.ptr + sequence.length); 3151 1.1 mrg } 3152 1.1 mrg 3153 1.1 mrg test(`'`, '\''); 3154 1.1 mrg test(`"`, '"'); 3155 1.1 mrg test(`?`, '?'); 3156 1.1 mrg test(`\`, '\\'); 3157 1.1 mrg test(`0`, '\0'); 3158 1.1 mrg test(`a`, '\a'); 3159 1.1 mrg test(`b`, '\b'); 3160 1.1 mrg test(`f`, '\f'); 3161 1.1 mrg test(`n`, '\n'); 3162 1.1 mrg test(`r`, '\r'); 3163 1.1 mrg test(`t`, '\t'); 3164 1.1 mrg test(`v`, '\v'); 3165 1.1 mrg 3166 1.1 mrg test(`x00`, 0x00); 3167 1.1 mrg test(`xff`, 0xff); 3168 1.1 mrg test(`xFF`, 0xff); 3169 1.1 mrg test(`xa7`, 0xa7); 3170 1.1 mrg test(`x3c`, 0x3c); 3171 1.1 mrg test(`xe2`, 0xe2); 3172 1.1 mrg 3173 1.1 mrg test(`1`, '\1'); 3174 1.1 mrg test(`42`, '\42'); 3175 1.1 mrg test(`357`, '\357'); 3176 1.1 mrg 3177 1.1 mrg test(`u1234`, '\u1234'); 3178 1.1 mrg test(`uf0e4`, '\uf0e4'); 3179 1.1 mrg 3180 1.1 mrg test(`U0001f603`, '\U0001f603'); 3181 1.1 mrg 3182 1.1 mrg test(`"`, '"'); 3183 1.1 mrg test(`<`, '<'); 3184 1.1 mrg test(`>`, '>'); 3185 1.1 mrg 3186 1.1 mrg diagnosticHandler = null; 3187 1.1 mrg } 3188 1.1 mrg 3189 1.1 mrg unittest 3190 1.1 mrg { 3191 1.1 mrg import dmd.console; 3192 1.1 mrg string expected; 3193 1.1 mrg bool gotError; 3194 1.1 mrg 3195 1.1 mrg nothrow bool expectDiagnosticHandler(const ref Loc loc, Color headerColor, const(char)* header, 3196 1.1 mrg const(char)* format, va_list ap, const(char)* p1, const(char)* p2) 3197 1.1 mrg { 3198 1.1 mrg assert(cast(Classification)headerColor == Classification.error); 3199 1.1 mrg 3200 1.1 mrg gotError = true; 3201 1.1 mrg char[100] buffer = void; 3202 1.1 mrg auto actual = buffer[0 .. vsprintf(buffer.ptr, format, ap)]; 3203 1.1 mrg assert(expected == actual); 3204 1.1 mrg return true; 3205 1.1 mrg } 3206 1.1 mrg 3207 1.1 mrg diagnosticHandler = &expectDiagnosticHandler; 3208 1.1 mrg 3209 1.1 mrg void test(string sequence, string expectedError, dchar expectedReturnValue, uint expectedScanLength, bool Ccompile = false) 3210 1.1 mrg { 3211 1.1 mrg uint errors = global.errors; 3212 1.1 mrg gotError = false; 3213 1.1 mrg expected = expectedError; 3214 1.1 mrg auto p = cast(const(char)*)sequence.ptr; 3215 1.1 mrg auto actualReturnValue = Lexer.escapeSequence(Loc.initial, p, Ccompile); 3216 1.1 mrg assert(gotError); 3217 1.1 mrg assert(expectedReturnValue == actualReturnValue); 3218 1.1 mrg 3219 1.1 mrg auto actualScanLength = p - sequence.ptr; 3220 1.1 mrg assert(expectedScanLength == actualScanLength); 3221 1.1 mrg global.errors = errors; 3222 1.1 mrg } 3223 1.1 mrg 3224 1.1 mrg test("c", `undefined escape sequence \c`, 'c', 1); 3225 1.1 mrg test("!", `undefined escape sequence \!`, '!', 1); 3226 1.1 mrg test(""", `undefined escape sequence \&`, '&', 1, true); 3227 1.1 mrg 3228 1.1 mrg test("x1", `escape hex sequence has 1 hex digits instead of 2`, '\x01', 2); 3229 1.1 mrg 3230 1.1 mrg test("u1" , `escape hex sequence has 1 hex digits instead of 4`, 0x1, 2); 3231 1.1 mrg test("u12" , `escape hex sequence has 2 hex digits instead of 4`, 0x12, 3); 3232 1.1 mrg test("u123", `escape hex sequence has 3 hex digits instead of 4`, 0x123, 4); 3233 1.1 mrg 3234 1.1 mrg test("U0" , `escape hex sequence has 1 hex digits instead of 8`, 0x0, 2); 3235 1.1 mrg test("U00" , `escape hex sequence has 2 hex digits instead of 8`, 0x00, 3); 3236 1.1 mrg test("U000" , `escape hex sequence has 3 hex digits instead of 8`, 0x000, 4); 3237 1.1 mrg test("U0000" , `escape hex sequence has 4 hex digits instead of 8`, 0x0000, 5); 3238 1.1 mrg test("U0001f" , `escape hex sequence has 5 hex digits instead of 8`, 0x0001f, 6); 3239 1.1 mrg test("U0001f6" , `escape hex sequence has 6 hex digits instead of 8`, 0x0001f6, 7); 3240 1.1 mrg test("U0001f60", `escape hex sequence has 7 hex digits instead of 8`, 0x0001f60, 8); 3241 1.1 mrg 3242 1.1 mrg test("ud800" , `invalid UTF character \U0000d800`, '?', 5); 3243 1.1 mrg test("udfff" , `invalid UTF character \U0000dfff`, '?', 5); 3244 1.1 mrg test("U00110000", `invalid UTF character \U00110000`, '?', 9); 3245 1.1 mrg 3246 1.1 mrg test("xg0" , `undefined escape hex sequence \xg`, 'g', 2); 3247 1.1 mrg test("ug000" , `undefined escape hex sequence \ug`, 'g', 2); 3248 1.1 mrg test("Ug0000000", `undefined escape hex sequence \Ug`, 'g', 2); 3249 1.1 mrg 3250 1.1 mrg test("&BAD;", `unnamed character entity &BAD;` , '?', 5); 3251 1.1 mrg test(""", `unterminated named entity "`, '?', 5); 3252 1.1 mrg test(""", `unterminated named entity "`, '?', 5); 3253 1.1 mrg 3254 1.1 mrg test("400", `escape octal sequence \400 is larger than \377`, 0x100, 3); 3255 1.1 mrg 3256 1.1 mrg diagnosticHandler = null; 3257 1.1 mrg } 3258 1.1 mrg 3259 1.1 mrg unittest 3260 1.1 mrg { 3261 1.1 mrg //printf("lexer.unittest\n"); 3262 1.1 mrg /* Not much here, just trying things out. 3263 1.1 mrg */ 3264 1.1 mrg string text = "int"; // We rely on the implicit null-terminator 3265 1.1 mrg scope Lexer lex1 = new Lexer(null, text.ptr, 0, text.length, 0, 0); 3266 1.1 mrg TOK tok; 3267 1.1 mrg tok = lex1.nextToken(); 3268 1.1 mrg //printf("tok == %s, %d, %d\n", Token::toChars(tok), tok, TOK.int32); 3269 1.1 mrg assert(tok == TOK.int32); 3270 1.1 mrg tok = lex1.nextToken(); 3271 1.1 mrg assert(tok == TOK.endOfFile); 3272 1.1 mrg tok = lex1.nextToken(); 3273 1.1 mrg assert(tok == TOK.endOfFile); 3274 1.1 mrg tok = lex1.nextToken(); 3275 1.1 mrg assert(tok == TOK.endOfFile); 3276 1.1 mrg } 3277 1.1 mrg 3278 1.1 mrg unittest 3279 1.1 mrg { 3280 1.1 mrg // We don't want to see Lexer error output during these tests. 3281 1.1 mrg uint errors = global.startGagging(); 3282 1.1 mrg scope(exit) global.endGagging(errors); 3283 1.1 mrg 3284 1.1 mrg // Test malformed input: even malformed input should end in a TOK.endOfFile. 3285 1.1 mrg static immutable char[][] testcases = 3286 1.1 mrg [ // Testcase must end with 0 or 0x1A. 3287 1.1 mrg [0], // not malformed, but pathological 3288 1.1 mrg ['\'', 0], 3289 1.1 mrg ['\'', 0x1A], 3290 1.1 mrg ['{', '{', 'q', '{', 0], 3291 1.1 mrg [0xFF, 0], 3292 1.1 mrg [0xFF, 0x80, 0], 3293 1.1 mrg [0xFF, 0xFF, 0], 3294 1.1 mrg [0xFF, 0xFF, 0], 3295 1.1 mrg ['x', '"', 0x1A], 3296 1.1 mrg ]; 3297 1.1 mrg 3298 1.1 mrg foreach (testcase; testcases) 3299 1.1 mrg { 3300 1.1 mrg scope Lexer lex2 = new Lexer(null, testcase.ptr, 0, testcase.length-1, 0, 0); 3301 1.1 mrg TOK tok = lex2.nextToken(); 3302 1.1 mrg size_t iterations = 1; 3303 1.1 mrg while ((tok != TOK.endOfFile) && (iterations++ < testcase.length)) 3304 1.1 mrg { 3305 1.1 mrg tok = lex2.nextToken(); 3306 1.1 mrg } 3307 1.1 mrg assert(tok == TOK.endOfFile); 3308 1.1 mrg tok = lex2.nextToken(); 3309 1.1 mrg assert(tok == TOK.endOfFile); 3310 1.1 mrg } 3311 1.1 mrg } 3312