Home | History | Annotate | Line # | Download | only in libcpp
lex.cc revision 1.1.1.3
      1      1.1  mrg /* CPP Library - lexical analysis.
      2  1.1.1.3  mrg    Copyright (C) 2000-2024 Free Software Foundation, Inc.
      3      1.1  mrg    Contributed by Per Bothner, 1994-95.
      4      1.1  mrg    Based on CCCP program by Paul Rubin, June 1986
      5      1.1  mrg    Adapted to ANSI C, Richard Stallman, Jan 1987
      6      1.1  mrg    Broken out to separate file, Zack Weinberg, Mar 2000
      7      1.1  mrg 
      8      1.1  mrg This program is free software; you can redistribute it and/or modify it
      9      1.1  mrg under the terms of the GNU General Public License as published by the
     10      1.1  mrg Free Software Foundation; either version 3, or (at your option) any
     11      1.1  mrg later version.
     12      1.1  mrg 
     13      1.1  mrg This program is distributed in the hope that it will be useful,
     14      1.1  mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
     15      1.1  mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16      1.1  mrg GNU General Public License for more details.
     17      1.1  mrg 
     18      1.1  mrg You should have received a copy of the GNU General Public License
     19      1.1  mrg along with this program; see the file COPYING3.  If not see
     20      1.1  mrg <http://www.gnu.org/licenses/>.  */
     21      1.1  mrg 
     22      1.1  mrg #include "config.h"
     23      1.1  mrg #include "system.h"
     24      1.1  mrg #include "cpplib.h"
     25      1.1  mrg #include "internal.h"
     26      1.1  mrg 
     27      1.1  mrg enum spell_type
     28      1.1  mrg {
     29      1.1  mrg   SPELL_OPERATOR = 0,
     30      1.1  mrg   SPELL_IDENT,
     31      1.1  mrg   SPELL_LITERAL,
     32      1.1  mrg   SPELL_NONE
     33      1.1  mrg };
     34      1.1  mrg 
     35      1.1  mrg struct token_spelling
     36      1.1  mrg {
     37      1.1  mrg   enum spell_type category;
     38      1.1  mrg   const unsigned char *name;
     39      1.1  mrg };
     40      1.1  mrg 
     41      1.1  mrg static const unsigned char *const digraph_spellings[] =
     42      1.1  mrg { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
     43      1.1  mrg 
     44      1.1  mrg #define OP(e, s) { SPELL_OPERATOR, UC s  },
     45      1.1  mrg #define TK(e, s) { SPELL_ ## s,    UC #e },
     46      1.1  mrg static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
     47      1.1  mrg #undef OP
     48      1.1  mrg #undef TK
     49      1.1  mrg 
     50      1.1  mrg #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
     51      1.1  mrg #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
     52      1.1  mrg 
     53  1.1.1.3  mrg /* ISO 10646 defines the UCS codespace as the range 0-0x10FFFF inclusive.  */
     54  1.1.1.3  mrg #define UCS_LIMIT 0x10FFFF
     55  1.1.1.3  mrg 
     56      1.1  mrg static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
     57      1.1  mrg static int skip_line_comment (cpp_reader *);
     58      1.1  mrg static void skip_whitespace (cpp_reader *, cppchar_t);
     59      1.1  mrg static void lex_string (cpp_reader *, cpp_token *, const uchar *);
     60      1.1  mrg static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
     61      1.1  mrg static void store_comment (cpp_reader *, cpp_token *);
     62      1.1  mrg static void create_literal (cpp_reader *, cpp_token *, const uchar *,
     63      1.1  mrg 			    unsigned int, enum cpp_ttype);
     64      1.1  mrg static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
     65      1.1  mrg static int name_p (cpp_reader *, const cpp_string *);
     66      1.1  mrg static tokenrun *next_tokenrun (tokenrun *);
     67      1.1  mrg 
     68      1.1  mrg static _cpp_buff *new_buff (size_t);
     69      1.1  mrg 
     70      1.1  mrg 
     71      1.1  mrg /* Utility routine:
     72      1.1  mrg 
     73      1.1  mrg    Compares, the token TOKEN to the NUL-terminated string STRING.
     74      1.1  mrg    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
     75      1.1  mrg int
     76      1.1  mrg cpp_ideq (const cpp_token *token, const char *string)
     77      1.1  mrg {
     78      1.1  mrg   if (token->type != CPP_NAME)
     79      1.1  mrg     return 0;
     80      1.1  mrg 
     81      1.1  mrg   return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
     82      1.1  mrg }
     83      1.1  mrg 
     84      1.1  mrg /* Record a note TYPE at byte POS into the current cleaned logical
     85      1.1  mrg    line.  */
     86      1.1  mrg static void
     87      1.1  mrg add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
     88      1.1  mrg {
     89      1.1  mrg   if (buffer->notes_used == buffer->notes_cap)
     90      1.1  mrg     {
     91      1.1  mrg       buffer->notes_cap = buffer->notes_cap * 2 + 200;
     92      1.1  mrg       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
     93      1.1  mrg                                   buffer->notes_cap);
     94      1.1  mrg     }
     95      1.1  mrg 
     96      1.1  mrg   buffer->notes[buffer->notes_used].pos = pos;
     97      1.1  mrg   buffer->notes[buffer->notes_used].type = type;
     98      1.1  mrg   buffer->notes_used++;
     99      1.1  mrg }
    100      1.1  mrg 
    101      1.1  mrg 
    102      1.1  mrg /* Fast path to find line special characters using optimized character
    104      1.1  mrg    scanning algorithms.  Anything complicated falls back to the slow
    105      1.1  mrg    path below.  Since this loop is very hot it's worth doing these kinds
    106      1.1  mrg    of optimizations.
    107      1.1  mrg 
    108      1.1  mrg    One of the paths through the ifdefs should provide
    109      1.1  mrg 
    110      1.1  mrg      const uchar *search_line_fast (const uchar *s, const uchar *end);
    111      1.1  mrg 
    112      1.1  mrg    Between S and END, search for \n, \r, \\, ?.  Return a pointer to
    113      1.1  mrg    the found character.
    114      1.1  mrg 
    115      1.1  mrg    Note that the last character of the buffer is *always* a newline,
    116      1.1  mrg    as forced by _cpp_convert_input.  This fact can be used to avoid
    117      1.1  mrg    explicitly looking for the end of the buffer.  */
    118      1.1  mrg 
    119      1.1  mrg /* Configure gives us an ifdef test.  */
    120      1.1  mrg #ifndef WORDS_BIGENDIAN
    121      1.1  mrg #define WORDS_BIGENDIAN 0
    122      1.1  mrg #endif
    123      1.1  mrg 
    124      1.1  mrg /* We'd like the largest integer that fits into a register.  There's nothing
    125      1.1  mrg    in <stdint.h> that gives us that.  For most hosts this is unsigned long,
    126      1.1  mrg    but MS decided on an LLP64 model.  Thankfully when building with GCC we
    127      1.1  mrg    can get the "real" word size.  */
    128      1.1  mrg #ifdef __GNUC__
    129      1.1  mrg typedef unsigned int word_type __attribute__((__mode__(__word__)));
    130      1.1  mrg #else
    131      1.1  mrg typedef unsigned long word_type;
    132      1.1  mrg #endif
    133      1.1  mrg 
    134      1.1  mrg /* The code below is only expecting sizes 4 or 8.
    135      1.1  mrg    Die at compile-time if this expectation is violated.  */
    136      1.1  mrg typedef char check_word_type_size
    137      1.1  mrg   [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
    138      1.1  mrg 
    139      1.1  mrg /* Return X with the first N bytes forced to values that won't match one
    140      1.1  mrg    of the interesting characters.  Note that NUL is not interesting.  */
    141      1.1  mrg 
    142      1.1  mrg static inline word_type
    143      1.1  mrg acc_char_mask_misalign (word_type val, unsigned int n)
    144      1.1  mrg {
    145      1.1  mrg   word_type mask = -1;
    146      1.1  mrg   if (WORDS_BIGENDIAN)
    147      1.1  mrg     mask >>= n * 8;
    148      1.1  mrg   else
    149      1.1  mrg     mask <<= n * 8;
    150      1.1  mrg   return val & mask;
    151      1.1  mrg }
    152      1.1  mrg 
    153      1.1  mrg /* Return X replicated to all byte positions within WORD_TYPE.  */
    154      1.1  mrg 
    155      1.1  mrg static inline word_type
    156      1.1  mrg acc_char_replicate (uchar x)
    157      1.1  mrg {
    158      1.1  mrg   word_type ret;
    159      1.1  mrg 
    160      1.1  mrg   ret = (x << 24) | (x << 16) | (x << 8) | x;
    161      1.1  mrg   if (sizeof(word_type) == 8)
    162      1.1  mrg     ret = (ret << 16 << 16) | ret;
    163      1.1  mrg   return ret;
    164      1.1  mrg }
    165      1.1  mrg 
    166      1.1  mrg /* Return non-zero if some byte of VAL is (probably) C.  */
    167      1.1  mrg 
    168      1.1  mrg static inline word_type
    169      1.1  mrg acc_char_cmp (word_type val, word_type c)
    170      1.1  mrg {
    171      1.1  mrg #if defined(__GNUC__) && defined(__alpha__)
    172      1.1  mrg   /* We can get exact results using a compare-bytes instruction.
    173      1.1  mrg      Get (val == c) via (0 >= (val ^ c)).  */
    174      1.1  mrg   return __builtin_alpha_cmpbge (0, val ^ c);
    175      1.1  mrg #else
    176      1.1  mrg   word_type magic = 0x7efefefeU;
    177      1.1  mrg   if (sizeof(word_type) == 8)
    178      1.1  mrg     magic = (magic << 16 << 16) | 0xfefefefeU;
    179      1.1  mrg   magic |= 1;
    180      1.1  mrg 
    181      1.1  mrg   val ^= c;
    182      1.1  mrg   return ((val + magic) ^ ~val) & ~magic;
    183      1.1  mrg #endif
    184      1.1  mrg }
    185      1.1  mrg 
    186      1.1  mrg /* Given the result of acc_char_cmp is non-zero, return the index of
    187      1.1  mrg    the found character.  If this was a false positive, return -1.  */
    188      1.1  mrg 
    189      1.1  mrg static inline int
    190      1.1  mrg acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
    191      1.1  mrg 		word_type val ATTRIBUTE_UNUSED)
    192      1.1  mrg {
    193      1.1  mrg #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
    194      1.1  mrg   /* The cmpbge instruction sets *bits* of the result corresponding to
    195      1.1  mrg      matches in the bytes with no false positives.  */
    196      1.1  mrg   return __builtin_ctzl (cmp);
    197      1.1  mrg #else
    198      1.1  mrg   unsigned int i;
    199      1.1  mrg 
    200      1.1  mrg   /* ??? It would be nice to force unrolling here,
    201      1.1  mrg      and have all of these constants folded.  */
    202      1.1  mrg   for (i = 0; i < sizeof(word_type); ++i)
    203      1.1  mrg     {
    204      1.1  mrg       uchar c;
    205      1.1  mrg       if (WORDS_BIGENDIAN)
    206      1.1  mrg 	c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
    207      1.1  mrg       else
    208      1.1  mrg 	c = (val >> i * 8) & 0xff;
    209      1.1  mrg 
    210      1.1  mrg       if (c == '\n' || c == '\r' || c == '\\' || c == '?')
    211      1.1  mrg 	return i;
    212      1.1  mrg     }
    213      1.1  mrg 
    214      1.1  mrg   return -1;
    215      1.1  mrg #endif
    216      1.1  mrg }
    217      1.1  mrg 
    218      1.1  mrg /* A version of the fast scanner using bit fiddling techniques.
    219      1.1  mrg 
    220      1.1  mrg    For 32-bit words, one would normally perform 16 comparisons and
    221      1.1  mrg    16 branches.  With this algorithm one performs 24 arithmetic
    222      1.1  mrg    operations and one branch.  Whether this is faster with a 32-bit
    223      1.1  mrg    word size is going to be somewhat system dependent.
    224      1.1  mrg 
    225      1.1  mrg    For 64-bit words, we eliminate twice the number of comparisons
    226      1.1  mrg    and branches without increasing the number of arithmetic operations.
    227      1.1  mrg    It's almost certainly going to be a win with 64-bit word size.  */
    228      1.1  mrg 
    229      1.1  mrg static const uchar * search_line_acc_char (const uchar *, const uchar *)
    230      1.1  mrg   ATTRIBUTE_UNUSED;
    231      1.1  mrg 
    232      1.1  mrg static const uchar *
    233      1.1  mrg search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
    234      1.1  mrg {
    235      1.1  mrg   const word_type repl_nl = acc_char_replicate ('\n');
    236      1.1  mrg   const word_type repl_cr = acc_char_replicate ('\r');
    237      1.1  mrg   const word_type repl_bs = acc_char_replicate ('\\');
    238      1.1  mrg   const word_type repl_qm = acc_char_replicate ('?');
    239      1.1  mrg 
    240      1.1  mrg   unsigned int misalign;
    241      1.1  mrg   const word_type *p;
    242      1.1  mrg   word_type val, t;
    243      1.1  mrg 
    244      1.1  mrg   /* Align the buffer.  Mask out any bytes from before the beginning.  */
    245      1.1  mrg   p = (word_type *)((uintptr_t)s & -sizeof(word_type));
    246      1.1  mrg   val = *p;
    247      1.1  mrg   misalign = (uintptr_t)s & (sizeof(word_type) - 1);
    248      1.1  mrg   if (misalign)
    249      1.1  mrg     val = acc_char_mask_misalign (val, misalign);
    250      1.1  mrg 
    251      1.1  mrg   /* Main loop.  */
    252      1.1  mrg   while (1)
    253      1.1  mrg     {
    254      1.1  mrg       t  = acc_char_cmp (val, repl_nl);
    255      1.1  mrg       t |= acc_char_cmp (val, repl_cr);
    256      1.1  mrg       t |= acc_char_cmp (val, repl_bs);
    257      1.1  mrg       t |= acc_char_cmp (val, repl_qm);
    258      1.1  mrg 
    259      1.1  mrg       if (__builtin_expect (t != 0, 0))
    260      1.1  mrg 	{
    261      1.1  mrg 	  int i = acc_char_index (t, val);
    262      1.1  mrg 	  if (i >= 0)
    263      1.1  mrg 	    return (const uchar *)p + i;
    264      1.1  mrg 	}
    265      1.1  mrg 
    266      1.1  mrg       val = *++p;
    267      1.1  mrg     }
    268      1.1  mrg }
    269      1.1  mrg 
    270      1.1  mrg /* Disable on Solaris 2/x86 until the following problem can be properly
    271      1.1  mrg    autoconfed:
    272      1.1  mrg 
    273      1.1  mrg    The Solaris 10+ assembler tags objects with the instruction set
    274      1.1  mrg    extensions used, so SSE4.2 executables cannot run on machines that
    275      1.1  mrg    don't support that extension.  */
    276      1.1  mrg 
    277      1.1  mrg #if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
    278      1.1  mrg 
    279      1.1  mrg /* Replicated character data to be shared between implementations.
    280      1.1  mrg    Recall that outside of a context with vector support we can't
    281      1.1  mrg    define compatible vector types, therefore these are all defined
    282      1.1  mrg    in terms of raw characters.  */
    283      1.1  mrg static const char repl_chars[4][16] __attribute__((aligned(16))) = {
    284      1.1  mrg   { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
    285      1.1  mrg     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
    286      1.1  mrg   { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
    287      1.1  mrg     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
    288      1.1  mrg   { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
    289      1.1  mrg     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
    290      1.1  mrg   { '?', '?', '?', '?', '?', '?', '?', '?',
    291      1.1  mrg     '?', '?', '?', '?', '?', '?', '?', '?' },
    292      1.1  mrg };
    293      1.1  mrg 
    294      1.1  mrg /* A version of the fast scanner using MMX vectorized byte compare insns.
    295      1.1  mrg 
    296      1.1  mrg    This uses the PMOVMSKB instruction which was introduced with "MMX2",
    297      1.1  mrg    which was packaged into SSE1; it is also present in the AMD MMX
    298      1.1  mrg    extension.  Mark the function as using "sse" so that we emit a real
    299      1.1  mrg    "emms" instruction, rather than the 3dNOW "femms" instruction.  */
    300      1.1  mrg 
    301      1.1  mrg static const uchar *
    302      1.1  mrg #ifndef __SSE__
    303      1.1  mrg __attribute__((__target__("sse")))
    304      1.1  mrg #endif
    305      1.1  mrg search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
    306      1.1  mrg {
    307      1.1  mrg   typedef char v8qi __attribute__ ((__vector_size__ (8)));
    308      1.1  mrg   typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
    309      1.1  mrg 
    310      1.1  mrg   const v8qi repl_nl = *(const v8qi *)repl_chars[0];
    311      1.1  mrg   const v8qi repl_cr = *(const v8qi *)repl_chars[1];
    312      1.1  mrg   const v8qi repl_bs = *(const v8qi *)repl_chars[2];
    313      1.1  mrg   const v8qi repl_qm = *(const v8qi *)repl_chars[3];
    314      1.1  mrg 
    315      1.1  mrg   unsigned int misalign, found, mask;
    316      1.1  mrg   const v8qi *p;
    317      1.1  mrg   v8qi data, t, c;
    318      1.1  mrg 
    319      1.1  mrg   /* Align the source pointer.  While MMX doesn't generate unaligned data
    320      1.1  mrg      faults, this allows us to safely scan to the end of the buffer without
    321      1.1  mrg      reading beyond the end of the last page.  */
    322      1.1  mrg   misalign = (uintptr_t)s & 7;
    323      1.1  mrg   p = (const v8qi *)((uintptr_t)s & -8);
    324      1.1  mrg   data = *p;
    325      1.1  mrg 
    326      1.1  mrg   /* Create a mask for the bytes that are valid within the first
    327      1.1  mrg      16-byte block.  The Idea here is that the AND with the mask
    328      1.1  mrg      within the loop is "free", since we need some AND or TEST
    329      1.1  mrg      insn in order to set the flags for the branch anyway.  */
    330      1.1  mrg   mask = -1u << misalign;
    331      1.1  mrg 
    332      1.1  mrg   /* Main loop processing 8 bytes at a time.  */
    333      1.1  mrg   goto start;
    334      1.1  mrg   do
    335      1.1  mrg     {
    336      1.1  mrg       data = *++p;
    337      1.1  mrg       mask = -1;
    338      1.1  mrg 
    339      1.1  mrg     start:
    340      1.1  mrg       t = __builtin_ia32_pcmpeqb(data, repl_nl);
    341      1.1  mrg       c = __builtin_ia32_pcmpeqb(data, repl_cr);
    342      1.1  mrg       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
    343      1.1  mrg       c = __builtin_ia32_pcmpeqb(data, repl_bs);
    344      1.1  mrg       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
    345      1.1  mrg       c = __builtin_ia32_pcmpeqb(data, repl_qm);
    346      1.1  mrg       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
    347      1.1  mrg       found = __builtin_ia32_pmovmskb (t);
    348      1.1  mrg       found &= mask;
    349      1.1  mrg     }
    350      1.1  mrg   while (!found);
    351      1.1  mrg 
    352      1.1  mrg   __builtin_ia32_emms ();
    353      1.1  mrg 
    354      1.1  mrg   /* FOUND contains 1 in bits for which we matched a relevant
    355      1.1  mrg      character.  Conversion to the byte index is trivial.  */
    356      1.1  mrg   found = __builtin_ctz(found);
    357      1.1  mrg   return (const uchar *)p + found;
    358      1.1  mrg }
    359      1.1  mrg 
    360      1.1  mrg /* A version of the fast scanner using SSE2 vectorized byte compare insns.  */
    361      1.1  mrg 
    362      1.1  mrg static const uchar *
    363      1.1  mrg #ifndef __SSE2__
    364      1.1  mrg __attribute__((__target__("sse2")))
    365      1.1  mrg #endif
    366      1.1  mrg search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
    367      1.1  mrg {
    368      1.1  mrg   typedef char v16qi __attribute__ ((__vector_size__ (16)));
    369      1.1  mrg 
    370      1.1  mrg   const v16qi repl_nl = *(const v16qi *)repl_chars[0];
    371      1.1  mrg   const v16qi repl_cr = *(const v16qi *)repl_chars[1];
    372      1.1  mrg   const v16qi repl_bs = *(const v16qi *)repl_chars[2];
    373      1.1  mrg   const v16qi repl_qm = *(const v16qi *)repl_chars[3];
    374      1.1  mrg 
    375      1.1  mrg   unsigned int misalign, found, mask;
    376      1.1  mrg   const v16qi *p;
    377      1.1  mrg   v16qi data, t;
    378      1.1  mrg 
    379      1.1  mrg   /* Align the source pointer.  */
    380      1.1  mrg   misalign = (uintptr_t)s & 15;
    381      1.1  mrg   p = (const v16qi *)((uintptr_t)s & -16);
    382      1.1  mrg   data = *p;
    383      1.1  mrg 
    384      1.1  mrg   /* Create a mask for the bytes that are valid within the first
    385      1.1  mrg      16-byte block.  The Idea here is that the AND with the mask
    386      1.1  mrg      within the loop is "free", since we need some AND or TEST
    387      1.1  mrg      insn in order to set the flags for the branch anyway.  */
    388      1.1  mrg   mask = -1u << misalign;
    389      1.1  mrg 
    390      1.1  mrg   /* Main loop processing 16 bytes at a time.  */
    391      1.1  mrg   goto start;
    392      1.1  mrg   do
    393      1.1  mrg     {
    394      1.1  mrg       data = *++p;
    395      1.1  mrg       mask = -1;
    396      1.1  mrg 
    397      1.1  mrg     start:
    398      1.1  mrg       t  = data == repl_nl;
    399      1.1  mrg       t |= data == repl_cr;
    400      1.1  mrg       t |= data == repl_bs;
    401      1.1  mrg       t |= data == repl_qm;
    402      1.1  mrg       found = __builtin_ia32_pmovmskb128 (t);
    403      1.1  mrg       found &= mask;
    404      1.1  mrg     }
    405      1.1  mrg   while (!found);
    406      1.1  mrg 
    407      1.1  mrg   /* FOUND contains 1 in bits for which we matched a relevant
    408      1.1  mrg      character.  Conversion to the byte index is trivial.  */
    409      1.1  mrg   found = __builtin_ctz(found);
    410      1.1  mrg   return (const uchar *)p + found;
    411      1.1  mrg }
    412      1.1  mrg 
    413      1.1  mrg #ifdef HAVE_SSE4
    414      1.1  mrg /* A version of the fast scanner using SSE 4.2 vectorized string insns.  */
    415      1.1  mrg 
    416      1.1  mrg static const uchar *
    417      1.1  mrg #ifndef __SSE4_2__
    418      1.1  mrg __attribute__((__target__("sse4.2")))
    419      1.1  mrg #endif
    420      1.1  mrg search_line_sse42 (const uchar *s, const uchar *end)
    421      1.1  mrg {
    422      1.1  mrg   typedef char v16qi __attribute__ ((__vector_size__ (16)));
    423      1.1  mrg   static const v16qi search = { '\n', '\r', '?', '\\' };
    424      1.1  mrg 
    425      1.1  mrg   uintptr_t si = (uintptr_t)s;
    426      1.1  mrg   uintptr_t index;
    427      1.1  mrg 
    428      1.1  mrg   /* Check for unaligned input.  */
    429      1.1  mrg   if (si & 15)
    430      1.1  mrg     {
    431      1.1  mrg       v16qi sv;
    432      1.1  mrg 
    433      1.1  mrg       if (__builtin_expect (end - s < 16, 0)
    434      1.1  mrg 	  && __builtin_expect ((si & 0xfff) > 0xff0, 0))
    435      1.1  mrg 	{
    436      1.1  mrg 	  /* There are less than 16 bytes left in the buffer, and less
    437      1.1  mrg 	     than 16 bytes left on the page.  Reading 16 bytes at this
    438      1.1  mrg 	     point might generate a spurious page fault.  Defer to the
    439      1.1  mrg 	     SSE2 implementation, which already handles alignment.  */
    440      1.1  mrg 	  return search_line_sse2 (s, end);
    441      1.1  mrg 	}
    442      1.1  mrg 
    443      1.1  mrg       /* ??? The builtin doesn't understand that the PCMPESTRI read from
    444      1.1  mrg 	 memory need not be aligned.  */
    445      1.1  mrg       sv = __builtin_ia32_loaddqu ((const char *) s);
    446      1.1  mrg       index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
    447      1.1  mrg 
    448      1.1  mrg       if (__builtin_expect (index < 16, 0))
    449      1.1  mrg 	goto found;
    450      1.1  mrg 
    451      1.1  mrg       /* Advance the pointer to an aligned address.  We will re-scan a
    452      1.1  mrg 	 few bytes, but we no longer need care for reading past the
    453      1.1  mrg 	 end of a page, since we're guaranteed a match.  */
    454      1.1  mrg       s = (const uchar *)((si + 15) & -16);
    455      1.1  mrg     }
    456      1.1  mrg 
    457      1.1  mrg   /* Main loop, processing 16 bytes at a time.  */
    458      1.1  mrg #ifdef __GCC_ASM_FLAG_OUTPUTS__
    459      1.1  mrg   while (1)
    460      1.1  mrg     {
    461      1.1  mrg       char f;
    462      1.1  mrg 
    463      1.1  mrg       /* By using inline assembly instead of the builtin,
    464      1.1  mrg 	 we can use the result, as well as the flags set.  */
    465      1.1  mrg       __asm ("%vpcmpestri\t$0, %2, %3"
    466      1.1  mrg 	     : "=c"(index), "=@ccc"(f)
    467      1.1  mrg 	     : "m"(*s), "x"(search), "a"(4), "d"(16));
    468      1.1  mrg       if (f)
    469      1.1  mrg 	break;
    470      1.1  mrg 
    471      1.1  mrg       s += 16;
    472      1.1  mrg     }
    473      1.1  mrg #else
    474      1.1  mrg   s -= 16;
    475      1.1  mrg   /* By doing the whole loop in inline assembly,
    476      1.1  mrg      we can make proper use of the flags set.  */
    477      1.1  mrg   __asm (      ".balign 16\n"
    478      1.1  mrg 	"0:	add $16, %1\n"
    479      1.1  mrg 	"	%vpcmpestri\t$0, (%1), %2\n"
    480      1.1  mrg 	"	jnc 0b"
    481      1.1  mrg 	: "=&c"(index), "+r"(s)
    482      1.1  mrg 	: "x"(search), "a"(4), "d"(16));
    483      1.1  mrg #endif
    484      1.1  mrg 
    485      1.1  mrg  found:
    486      1.1  mrg   return s + index;
    487      1.1  mrg }
    488      1.1  mrg 
    489      1.1  mrg #else
    490      1.1  mrg /* Work around out-dated assemblers without sse4 support.  */
    491      1.1  mrg #define search_line_sse42 search_line_sse2
    492      1.1  mrg #endif
    493      1.1  mrg 
    494      1.1  mrg /* Check the CPU capabilities.  */
    495      1.1  mrg 
    496      1.1  mrg #include "../gcc/config/i386/cpuid.h"
    497      1.1  mrg 
    498      1.1  mrg typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
    499      1.1  mrg static search_line_fast_type search_line_fast;
    500      1.1  mrg 
    501      1.1  mrg #define HAVE_init_vectorized_lexer 1
    502      1.1  mrg static inline void
    503      1.1  mrg init_vectorized_lexer (void)
    504      1.1  mrg {
    505      1.1  mrg   unsigned dummy, ecx = 0, edx = 0;
    506      1.1  mrg   search_line_fast_type impl = search_line_acc_char;
    507      1.1  mrg   int minimum = 0;
    508      1.1  mrg 
    509      1.1  mrg #if defined(__SSE4_2__)
    510      1.1  mrg   minimum = 3;
    511      1.1  mrg #elif defined(__SSE2__)
    512      1.1  mrg   minimum = 2;
    513      1.1  mrg #elif defined(__SSE__)
    514      1.1  mrg   minimum = 1;
    515      1.1  mrg #endif
    516      1.1  mrg 
    517      1.1  mrg   if (minimum == 3)
    518      1.1  mrg     impl = search_line_sse42;
    519      1.1  mrg   else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
    520      1.1  mrg     {
    521      1.1  mrg       if (minimum == 3 || (ecx & bit_SSE4_2))
    522      1.1  mrg         impl = search_line_sse42;
    523      1.1  mrg       else if (minimum == 2 || (edx & bit_SSE2))
    524      1.1  mrg 	impl = search_line_sse2;
    525      1.1  mrg       else if (minimum == 1 || (edx & bit_SSE))
    526      1.1  mrg 	impl = search_line_mmx;
    527      1.1  mrg     }
    528      1.1  mrg   else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
    529      1.1  mrg     {
    530      1.1  mrg       if (minimum == 1
    531      1.1  mrg 	  || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
    532      1.1  mrg 	impl = search_line_mmx;
    533      1.1  mrg     }
    534      1.1  mrg 
    535      1.1  mrg   search_line_fast = impl;
    536      1.1  mrg }
    537      1.1  mrg 
    538      1.1  mrg #elif (GCC_VERSION >= 4005) && defined(_ARCH_PWR8) && defined(__ALTIVEC__)
    539      1.1  mrg 
    540      1.1  mrg /* A vection of the fast scanner using AltiVec vectorized byte compares
    541      1.1  mrg    and VSX unaligned loads (when VSX is available).  This is otherwise
    542      1.1  mrg    the same as the AltiVec version.  */
    543      1.1  mrg 
    544      1.1  mrg ATTRIBUTE_NO_SANITIZE_UNDEFINED
    545      1.1  mrg static const uchar *
    546      1.1  mrg search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
    547      1.1  mrg {
    548      1.1  mrg   typedef __attribute__((altivec(vector))) unsigned char vc;
    549      1.1  mrg 
    550      1.1  mrg   const vc repl_nl = {
    551      1.1  mrg     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
    552      1.1  mrg     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
    553      1.1  mrg   };
    554      1.1  mrg   const vc repl_cr = {
    555      1.1  mrg     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
    556      1.1  mrg     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
    557      1.1  mrg   };
    558      1.1  mrg   const vc repl_bs = {
    559      1.1  mrg     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
    560      1.1  mrg     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
    561      1.1  mrg   };
    562      1.1  mrg   const vc repl_qm = {
    563      1.1  mrg     '?', '?', '?', '?', '?', '?', '?', '?',
    564      1.1  mrg     '?', '?', '?', '?', '?', '?', '?', '?',
    565      1.1  mrg   };
    566      1.1  mrg   const vc zero = { 0 };
    567      1.1  mrg 
    568      1.1  mrg   vc data, t;
    569      1.1  mrg 
    570      1.1  mrg   /* Main loop processing 16 bytes at a time.  */
    571      1.1  mrg   do
    572      1.1  mrg     {
    573      1.1  mrg       vc m_nl, m_cr, m_bs, m_qm;
    574      1.1  mrg 
    575      1.1  mrg       data = __builtin_vec_vsx_ld (0, s);
    576      1.1  mrg       s += 16;
    577      1.1  mrg 
    578      1.1  mrg       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
    579      1.1  mrg       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
    580      1.1  mrg       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
    581      1.1  mrg       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
    582      1.1  mrg       t = (m_nl | m_cr) | (m_bs | m_qm);
    583      1.1  mrg 
    584      1.1  mrg       /* T now contains 0xff in bytes for which we matched one of the relevant
    585      1.1  mrg 	 characters.  We want to exit the loop if any byte in T is non-zero.
    586      1.1  mrg 	 Below is the expansion of vec_any_ne(t, zero).  */
    587      1.1  mrg     }
    588      1.1  mrg   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
    589      1.1  mrg 
    590      1.1  mrg   /* Restore s to to point to the 16 bytes we just processed.  */
    591      1.1  mrg   s -= 16;
    592      1.1  mrg 
    593      1.1  mrg   {
    594      1.1  mrg #define N  (sizeof(vc) / sizeof(long))
    595      1.1  mrg 
    596      1.1  mrg     union {
    597      1.1  mrg       vc v;
    598      1.1  mrg       /* Statically assert that N is 2 or 4.  */
    599      1.1  mrg       unsigned long l[(N == 2 || N == 4) ? N : -1];
    600      1.1  mrg     } u;
    601      1.1  mrg     unsigned long l, i = 0;
    602      1.1  mrg 
    603      1.1  mrg     u.v = t;
    604      1.1  mrg 
    605      1.1  mrg     /* Find the first word of T that is non-zero.  */
    606      1.1  mrg     switch (N)
    607      1.1  mrg       {
    608      1.1  mrg       case 4:
    609      1.1  mrg 	l = u.l[i++];
    610      1.1  mrg 	if (l != 0)
    611      1.1  mrg 	  break;
    612      1.1  mrg 	s += sizeof(unsigned long);
    613      1.1  mrg 	l = u.l[i++];
    614      1.1  mrg 	if (l != 0)
    615      1.1  mrg 	  break;
    616      1.1  mrg 	s += sizeof(unsigned long);
    617      1.1  mrg 	/* FALLTHRU */
    618      1.1  mrg       case 2:
    619      1.1  mrg 	l = u.l[i++];
    620      1.1  mrg 	if (l != 0)
    621      1.1  mrg 	  break;
    622      1.1  mrg 	s += sizeof(unsigned long);
    623      1.1  mrg 	l = u.l[i];
    624      1.1  mrg       }
    625      1.1  mrg 
    626      1.1  mrg     /* L now contains 0xff in bytes for which we matched one of the
    627      1.1  mrg        relevant characters.  We can find the byte index by finding
    628      1.1  mrg        its bit index and dividing by 8.  */
    629      1.1  mrg #ifdef __BIG_ENDIAN__
    630      1.1  mrg     l = __builtin_clzl(l) >> 3;
    631      1.1  mrg #else
    632      1.1  mrg     l = __builtin_ctzl(l) >> 3;
    633      1.1  mrg #endif
    634      1.1  mrg     return s + l;
    635      1.1  mrg 
    636      1.1  mrg #undef N
    637      1.1  mrg   }
    638      1.1  mrg }
    639      1.1  mrg 
    640      1.1  mrg #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
    641      1.1  mrg 
    642      1.1  mrg /* A vection of the fast scanner using AltiVec vectorized byte compares.
    643      1.1  mrg    This cannot be used for little endian because vec_lvsl/lvsr are
    644      1.1  mrg    deprecated for little endian and the code won't work properly.  */
    645      1.1  mrg /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
    646      1.1  mrg    so we can't compile this function without -maltivec on the command line
    647      1.1  mrg    (or implied by some other switch).  */
    648      1.1  mrg 
    649      1.1  mrg static const uchar *
    650      1.1  mrg search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
    651      1.1  mrg {
    652      1.1  mrg   typedef __attribute__((altivec(vector))) unsigned char vc;
    653      1.1  mrg 
    654      1.1  mrg   const vc repl_nl = {
    655      1.1  mrg     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
    656      1.1  mrg     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
    657      1.1  mrg   };
    658      1.1  mrg   const vc repl_cr = {
    659      1.1  mrg     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
    660      1.1  mrg     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
    661      1.1  mrg   };
    662      1.1  mrg   const vc repl_bs = {
    663      1.1  mrg     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
    664      1.1  mrg     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
    665      1.1  mrg   };
    666      1.1  mrg   const vc repl_qm = {
    667      1.1  mrg     '?', '?', '?', '?', '?', '?', '?', '?',
    668      1.1  mrg     '?', '?', '?', '?', '?', '?', '?', '?',
    669      1.1  mrg   };
    670      1.1  mrg   const vc ones = {
    671      1.1  mrg     -1, -1, -1, -1, -1, -1, -1, -1,
    672      1.1  mrg     -1, -1, -1, -1, -1, -1, -1, -1,
    673      1.1  mrg   };
    674      1.1  mrg   const vc zero = { 0 };
    675      1.1  mrg 
    676      1.1  mrg   vc data, mask, t;
    677      1.1  mrg 
    678      1.1  mrg   /* Altivec loads automatically mask addresses with -16.  This lets us
    679      1.1  mrg      issue the first load as early as possible.  */
    680      1.1  mrg   data = __builtin_vec_ld(0, (const vc *)s);
    681      1.1  mrg 
    682      1.1  mrg   /* Discard bytes before the beginning of the buffer.  Do this by
    683      1.1  mrg      beginning with all ones and shifting in zeros according to the
    684      1.1  mrg      mis-alignment.  The LVSR instruction pulls the exact shift we
    685      1.1  mrg      want from the address.  */
    686      1.1  mrg   mask = __builtin_vec_lvsr(0, s);
    687      1.1  mrg   mask = __builtin_vec_perm(zero, ones, mask);
    688      1.1  mrg   data &= mask;
    689      1.1  mrg 
    690      1.1  mrg   /* While altivec loads mask addresses, we still need to align S so
    691      1.1  mrg      that the offset we compute at the end is correct.  */
    692      1.1  mrg   s = (const uchar *)((uintptr_t)s & -16);
    693      1.1  mrg 
    694      1.1  mrg   /* Main loop processing 16 bytes at a time.  */
    695      1.1  mrg   goto start;
    696      1.1  mrg   do
    697      1.1  mrg     {
    698      1.1  mrg       vc m_nl, m_cr, m_bs, m_qm;
    699      1.1  mrg 
    700      1.1  mrg       s += 16;
    701      1.1  mrg       data = __builtin_vec_ld(0, (const vc *)s);
    702      1.1  mrg 
    703      1.1  mrg     start:
    704      1.1  mrg       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
    705      1.1  mrg       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
    706      1.1  mrg       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
    707      1.1  mrg       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
    708      1.1  mrg       t = (m_nl | m_cr) | (m_bs | m_qm);
    709      1.1  mrg 
    710      1.1  mrg       /* T now contains 0xff in bytes for which we matched one of the relevant
    711      1.1  mrg 	 characters.  We want to exit the loop if any byte in T is non-zero.
    712      1.1  mrg 	 Below is the expansion of vec_any_ne(t, zero).  */
    713      1.1  mrg     }
    714      1.1  mrg   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
    715      1.1  mrg 
    716      1.1  mrg   {
    717      1.1  mrg #define N  (sizeof(vc) / sizeof(long))
    718      1.1  mrg 
    719      1.1  mrg     union {
    720      1.1  mrg       vc v;
    721      1.1  mrg       /* Statically assert that N is 2 or 4.  */
    722      1.1  mrg       unsigned long l[(N == 2 || N == 4) ? N : -1];
    723      1.1  mrg     } u;
    724      1.1  mrg     unsigned long l, i = 0;
    725      1.1  mrg 
    726      1.1  mrg     u.v = t;
    727      1.1  mrg 
    728      1.1  mrg     /* Find the first word of T that is non-zero.  */
    729      1.1  mrg     switch (N)
    730      1.1  mrg       {
    731      1.1  mrg       case 4:
    732      1.1  mrg 	l = u.l[i++];
    733      1.1  mrg 	if (l != 0)
    734      1.1  mrg 	  break;
    735      1.1  mrg 	s += sizeof(unsigned long);
    736      1.1  mrg 	l = u.l[i++];
    737      1.1  mrg 	if (l != 0)
    738      1.1  mrg 	  break;
    739      1.1  mrg 	s += sizeof(unsigned long);
    740      1.1  mrg 	/* FALLTHROUGH */
    741      1.1  mrg       case 2:
    742      1.1  mrg 	l = u.l[i++];
    743      1.1  mrg 	if (l != 0)
    744      1.1  mrg 	  break;
    745      1.1  mrg 	s += sizeof(unsigned long);
    746      1.1  mrg 	l = u.l[i];
    747      1.1  mrg       }
    748      1.1  mrg 
    749      1.1  mrg     /* L now contains 0xff in bytes for which we matched one of the
    750      1.1  mrg        relevant characters.  We can find the byte index by finding
    751      1.1  mrg        its bit index and dividing by 8.  */
    752      1.1  mrg     l = __builtin_clzl(l) >> 3;
    753      1.1  mrg     return s + l;
    754      1.1  mrg 
    755      1.1  mrg #undef N
    756      1.1  mrg   }
    757      1.1  mrg }
    758      1.1  mrg 
    759      1.1  mrg #elif defined (__ARM_NEON) && defined (__ARM_64BIT_STATE)
    760      1.1  mrg #include "arm_neon.h"
    761      1.1  mrg 
    762      1.1  mrg /* This doesn't have to be the exact page size, but no system may use
    763      1.1  mrg    a size smaller than this.  ARMv8 requires a minimum page size of
    764      1.1  mrg    4k.  The impact of being conservative here is a small number of
    765      1.1  mrg    cases will take the slightly slower entry path into the main
    766      1.1  mrg    loop.  */
    767      1.1  mrg 
    768      1.1  mrg #define AARCH64_MIN_PAGE_SIZE 4096
    769      1.1  mrg 
    770      1.1  mrg static const uchar *
    771      1.1  mrg search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
    772      1.1  mrg {
    773      1.1  mrg   const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
    774      1.1  mrg   const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
    775      1.1  mrg   const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
    776      1.1  mrg   const uint8x16_t repl_qm = vdupq_n_u8 ('?');
    777      1.1  mrg   const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
    778      1.1  mrg 
    779      1.1  mrg #ifdef __ARM_BIG_ENDIAN
    780      1.1  mrg   const int16x8_t shift = {8, 8, 8, 8, 0, 0, 0, 0};
    781      1.1  mrg #else
    782      1.1  mrg   const int16x8_t shift = {0, 0, 0, 0, 8, 8, 8, 8};
    783      1.1  mrg #endif
    784      1.1  mrg 
    785      1.1  mrg   unsigned int found;
    786      1.1  mrg   const uint8_t *p;
    787      1.1  mrg   uint8x16_t data;
    788      1.1  mrg   uint8x16_t t;
    789      1.1  mrg   uint16x8_t m;
    790      1.1  mrg   uint8x16_t u, v, w;
    791      1.1  mrg 
    792      1.1  mrg   /* Align the source pointer.  */
    793      1.1  mrg   p = (const uint8_t *)((uintptr_t)s & -16);
    794      1.1  mrg 
    795      1.1  mrg   /* Assuming random string start positions, with a 4k page size we'll take
    796      1.1  mrg      the slow path about 0.37% of the time.  */
    797      1.1  mrg   if (__builtin_expect ((AARCH64_MIN_PAGE_SIZE
    798      1.1  mrg 			 - (((uintptr_t) s) & (AARCH64_MIN_PAGE_SIZE - 1)))
    799      1.1  mrg 			< 16, 0))
    800      1.1  mrg     {
    801      1.1  mrg       /* Slow path: the string starts near a possible page boundary.  */
    802      1.1  mrg       uint32_t misalign, mask;
    803      1.1  mrg 
    804      1.1  mrg       misalign = (uintptr_t)s & 15;
    805      1.1  mrg       mask = (-1u << misalign) & 0xffff;
    806      1.1  mrg       data = vld1q_u8 (p);
    807      1.1  mrg       t = vceqq_u8 (data, repl_nl);
    808      1.1  mrg       u = vceqq_u8 (data, repl_cr);
    809      1.1  mrg       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
    810      1.1  mrg       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
    811      1.1  mrg       t = vorrq_u8 (v, w);
    812      1.1  mrg       t = vandq_u8 (t, xmask);
    813      1.1  mrg       m = vpaddlq_u8 (t);
    814      1.1  mrg       m = vshlq_u16 (m, shift);
    815      1.1  mrg       found = vaddvq_u16 (m);
    816      1.1  mrg       found &= mask;
    817      1.1  mrg       if (found)
    818      1.1  mrg 	return (const uchar*)p + __builtin_ctz (found);
    819      1.1  mrg     }
    820      1.1  mrg   else
    821      1.1  mrg     {
    822      1.1  mrg       data = vld1q_u8 ((const uint8_t *) s);
    823      1.1  mrg       t = vceqq_u8 (data, repl_nl);
    824      1.1  mrg       u = vceqq_u8 (data, repl_cr);
    825      1.1  mrg       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
    826      1.1  mrg       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
    827      1.1  mrg       t = vorrq_u8 (v, w);
    828      1.1  mrg       if (__builtin_expect (vpaddd_u64 ((uint64x2_t)t) != 0, 0))
    829      1.1  mrg 	goto done;
    830      1.1  mrg     }
    831      1.1  mrg 
    832      1.1  mrg   do
    833      1.1  mrg     {
    834      1.1  mrg       p += 16;
    835      1.1  mrg       data = vld1q_u8 (p);
    836      1.1  mrg       t = vceqq_u8 (data, repl_nl);
    837      1.1  mrg       u = vceqq_u8 (data, repl_cr);
    838      1.1  mrg       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
    839      1.1  mrg       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
    840      1.1  mrg       t = vorrq_u8 (v, w);
    841      1.1  mrg     } while (!vpaddd_u64 ((uint64x2_t)t));
    842      1.1  mrg 
    843      1.1  mrg done:
    844      1.1  mrg   /* Now that we've found the terminating substring, work out precisely where
    845      1.1  mrg      we need to stop.  */
    846      1.1  mrg   t = vandq_u8 (t, xmask);
    847      1.1  mrg   m = vpaddlq_u8 (t);
    848      1.1  mrg   m = vshlq_u16 (m, shift);
    849      1.1  mrg   found = vaddvq_u16 (m);
    850      1.1  mrg   return (((((uintptr_t) p) < (uintptr_t) s) ? s : (const uchar *)p)
    851      1.1  mrg 	  + __builtin_ctz (found));
    852      1.1  mrg }
    853      1.1  mrg 
    854      1.1  mrg #elif defined (__ARM_NEON)
    855      1.1  mrg #include "arm_neon.h"
    856      1.1  mrg 
    857      1.1  mrg static const uchar *
    858      1.1  mrg search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
    859      1.1  mrg {
    860      1.1  mrg   const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
    861      1.1  mrg   const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
    862      1.1  mrg   const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
    863      1.1  mrg   const uint8x16_t repl_qm = vdupq_n_u8 ('?');
    864      1.1  mrg   const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
    865      1.1  mrg 
    866      1.1  mrg   unsigned int misalign, found, mask;
    867      1.1  mrg   const uint8_t *p;
    868      1.1  mrg   uint8x16_t data;
    869      1.1  mrg 
    870      1.1  mrg   /* Align the source pointer.  */
    871      1.1  mrg   misalign = (uintptr_t)s & 15;
    872      1.1  mrg   p = (const uint8_t *)((uintptr_t)s & -16);
    873      1.1  mrg   data = vld1q_u8 (p);
    874      1.1  mrg 
    875      1.1  mrg   /* Create a mask for the bytes that are valid within the first
    876      1.1  mrg      16-byte block.  The Idea here is that the AND with the mask
    877      1.1  mrg      within the loop is "free", since we need some AND or TEST
    878      1.1  mrg      insn in order to set the flags for the branch anyway.  */
    879      1.1  mrg   mask = (-1u << misalign) & 0xffff;
    880      1.1  mrg 
    881      1.1  mrg   /* Main loop, processing 16 bytes at a time.  */
    882      1.1  mrg   goto start;
    883      1.1  mrg 
    884      1.1  mrg   do
    885      1.1  mrg     {
    886      1.1  mrg       uint8x8_t l;
    887      1.1  mrg       uint16x4_t m;
    888      1.1  mrg       uint32x2_t n;
    889      1.1  mrg       uint8x16_t t, u, v, w;
    890      1.1  mrg 
    891      1.1  mrg       p += 16;
    892      1.1  mrg       data = vld1q_u8 (p);
    893      1.1  mrg       mask = 0xffff;
    894      1.1  mrg 
    895      1.1  mrg     start:
    896      1.1  mrg       t = vceqq_u8 (data, repl_nl);
    897      1.1  mrg       u = vceqq_u8 (data, repl_cr);
    898      1.1  mrg       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
    899      1.1  mrg       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
    900      1.1  mrg       t = vandq_u8 (vorrq_u8 (v, w), xmask);
    901      1.1  mrg       l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
    902      1.1  mrg       m = vpaddl_u8 (l);
    903      1.1  mrg       n = vpaddl_u16 (m);
    904      1.1  mrg 
    905      1.1  mrg       found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
    906      1.1  mrg 	      vshr_n_u64 ((uint64x1_t) n, 24)), 0);
    907      1.1  mrg       found &= mask;
    908      1.1  mrg     }
    909      1.1  mrg   while (!found);
    910      1.1  mrg 
    911      1.1  mrg   /* FOUND contains 1 in bits for which we matched a relevant
    912      1.1  mrg      character.  Conversion to the byte index is trivial.  */
    913      1.1  mrg   found = __builtin_ctz (found);
    914      1.1  mrg   return (const uchar *)p + found;
    915      1.1  mrg }
    916      1.1  mrg 
    917      1.1  mrg #else
    918      1.1  mrg 
    919      1.1  mrg /* We only have one accelerated alternative.  Use a direct call so that
    920      1.1  mrg    we encourage inlining.  */
    921      1.1  mrg 
    922      1.1  mrg #define search_line_fast  search_line_acc_char
    923      1.1  mrg 
    924      1.1  mrg #endif
    925      1.1  mrg 
    926      1.1  mrg /* Initialize the lexer if needed.  */
    927      1.1  mrg 
    928      1.1  mrg void
    929      1.1  mrg _cpp_init_lexer (void)
    930      1.1  mrg {
    931      1.1  mrg #ifdef HAVE_init_vectorized_lexer
    932      1.1  mrg   init_vectorized_lexer ();
    933      1.1  mrg #endif
    934      1.1  mrg }
    935      1.1  mrg 
    936      1.1  mrg /* Returns with a logical line that contains no escaped newlines or
    937      1.1  mrg    trigraphs.  This is a time-critical inner loop.  */
    938      1.1  mrg void
    939      1.1  mrg _cpp_clean_line (cpp_reader *pfile)
    940      1.1  mrg {
    941      1.1  mrg   cpp_buffer *buffer;
    942      1.1  mrg   const uchar *s;
    943      1.1  mrg   uchar c, *d, *p;
    944      1.1  mrg 
    945      1.1  mrg   buffer = pfile->buffer;
    946      1.1  mrg   buffer->cur_note = buffer->notes_used = 0;
    947      1.1  mrg   buffer->cur = buffer->line_base = buffer->next_line;
    948      1.1  mrg   buffer->need_line = false;
    949      1.1  mrg   s = buffer->next_line;
    950      1.1  mrg 
    951      1.1  mrg   if (!buffer->from_stage3)
    952      1.1  mrg     {
    953      1.1  mrg       const uchar *pbackslash = NULL;
    954      1.1  mrg 
    955      1.1  mrg       /* Fast path.  This is the common case of an un-escaped line with
    956      1.1  mrg 	 no trigraphs.  The primary win here is by not writing any
    957      1.1  mrg 	 data back to memory until we have to.  */
    958      1.1  mrg       while (1)
    959      1.1  mrg 	{
    960      1.1  mrg 	  /* Perform an optimized search for \n, \r, \\, ?.  */
    961      1.1  mrg 	  s = search_line_fast (s, buffer->rlimit);
    962      1.1  mrg 
    963      1.1  mrg 	  c = *s;
    964      1.1  mrg 	  if (c == '\\')
    965      1.1  mrg 	    {
    966      1.1  mrg 	      /* Record the location of the backslash and continue.  */
    967      1.1  mrg 	      pbackslash = s++;
    968      1.1  mrg 	    }
    969      1.1  mrg 	  else if (__builtin_expect (c == '?', 0))
    970      1.1  mrg 	    {
    971      1.1  mrg 	      if (__builtin_expect (s[1] == '?', false)
    972      1.1  mrg 		   && _cpp_trigraph_map[s[2]])
    973      1.1  mrg 		{
    974      1.1  mrg 		  /* Have a trigraph.  We may or may not have to convert
    975      1.1  mrg 		     it.  Add a line note regardless, for -Wtrigraphs.  */
    976      1.1  mrg 		  add_line_note (buffer, s, s[2]);
    977      1.1  mrg 		  if (CPP_OPTION (pfile, trigraphs))
    978      1.1  mrg 		    {
    979      1.1  mrg 		      /* We do, and that means we have to switch to the
    980      1.1  mrg 		         slow path.  */
    981      1.1  mrg 		      d = (uchar *) s;
    982      1.1  mrg 		      *d = _cpp_trigraph_map[s[2]];
    983      1.1  mrg 		      s += 2;
    984      1.1  mrg 		      goto slow_path;
    985      1.1  mrg 		    }
    986      1.1  mrg 		}
    987      1.1  mrg 	      /* Not a trigraph.  Continue on fast-path.  */
    988      1.1  mrg 	      s++;
    989      1.1  mrg 	    }
    990      1.1  mrg 	  else
    991      1.1  mrg 	    break;
    992      1.1  mrg 	}
    993      1.1  mrg 
    994      1.1  mrg       /* This must be \r or \n.  We're either done, or we'll be forced
    995      1.1  mrg 	 to write back to the buffer and continue on the slow path.  */
    996      1.1  mrg       d = (uchar *) s;
    997      1.1  mrg 
    998      1.1  mrg       if (__builtin_expect (s == buffer->rlimit, false))
    999      1.1  mrg 	goto done;
   1000      1.1  mrg 
   1001      1.1  mrg       /* DOS line ending? */
   1002      1.1  mrg       if (__builtin_expect (c == '\r', false) && s[1] == '\n')
   1003      1.1  mrg 	{
   1004      1.1  mrg 	  s++;
   1005      1.1  mrg 	  if (s == buffer->rlimit)
   1006      1.1  mrg 	    goto done;
   1007      1.1  mrg 	}
   1008      1.1  mrg 
   1009      1.1  mrg       if (__builtin_expect (pbackslash == NULL, true))
   1010      1.1  mrg 	goto done;
   1011      1.1  mrg 
   1012      1.1  mrg       /* Check for escaped newline.  */
   1013      1.1  mrg       p = d;
   1014      1.1  mrg       while (is_nvspace (p[-1]))
   1015      1.1  mrg 	p--;
   1016      1.1  mrg       if (p - 1 != pbackslash)
   1017      1.1  mrg 	goto done;
   1018      1.1  mrg 
   1019      1.1  mrg       /* Have an escaped newline; process it and proceed to
   1020      1.1  mrg 	 the slow path.  */
   1021      1.1  mrg       add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
   1022      1.1  mrg       d = p - 2;
   1023      1.1  mrg       buffer->next_line = p - 1;
   1024      1.1  mrg 
   1025      1.1  mrg     slow_path:
   1026      1.1  mrg       while (1)
   1027      1.1  mrg 	{
   1028      1.1  mrg 	  c = *++s;
   1029      1.1  mrg 	  *++d = c;
   1030      1.1  mrg 
   1031      1.1  mrg 	  if (c == '\n' || c == '\r')
   1032      1.1  mrg 	    {
   1033      1.1  mrg 	      /* Handle DOS line endings.  */
   1034      1.1  mrg 	      if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
   1035      1.1  mrg 		s++;
   1036      1.1  mrg 	      if (s == buffer->rlimit)
   1037      1.1  mrg 		break;
   1038      1.1  mrg 
   1039      1.1  mrg 	      /* Escaped?  */
   1040      1.1  mrg 	      p = d;
   1041      1.1  mrg 	      while (p != buffer->next_line && is_nvspace (p[-1]))
   1042      1.1  mrg 		p--;
   1043      1.1  mrg 	      if (p == buffer->next_line || p[-1] != '\\')
   1044      1.1  mrg 		break;
   1045      1.1  mrg 
   1046      1.1  mrg 	      add_line_note (buffer, p - 1, p != d ? ' ': '\\');
   1047      1.1  mrg 	      d = p - 2;
   1048      1.1  mrg 	      buffer->next_line = p - 1;
   1049      1.1  mrg 	    }
   1050      1.1  mrg 	  else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
   1051      1.1  mrg 	    {
   1052      1.1  mrg 	      /* Add a note regardless, for the benefit of -Wtrigraphs.  */
   1053      1.1  mrg 	      add_line_note (buffer, d, s[2]);
   1054      1.1  mrg 	      if (CPP_OPTION (pfile, trigraphs))
   1055      1.1  mrg 		{
   1056      1.1  mrg 		  *d = _cpp_trigraph_map[s[2]];
   1057      1.1  mrg 		  s += 2;
   1058      1.1  mrg 		}
   1059      1.1  mrg 	    }
   1060      1.1  mrg 	}
   1061      1.1  mrg     }
   1062      1.1  mrg   else
   1063      1.1  mrg     {
   1064      1.1  mrg       while (*s != '\n' && *s != '\r')
   1065      1.1  mrg 	s++;
   1066      1.1  mrg       d = (uchar *) s;
   1067      1.1  mrg 
   1068      1.1  mrg       /* Handle DOS line endings.  */
   1069      1.1  mrg       if (*s == '\r' && s + 1 != buffer->rlimit && s[1] == '\n')
   1070      1.1  mrg 	s++;
   1071      1.1  mrg     }
   1072      1.1  mrg 
   1073      1.1  mrg  done:
   1074      1.1  mrg   *d = '\n';
   1075      1.1  mrg   /* A sentinel note that should never be processed.  */
   1076      1.1  mrg   add_line_note (buffer, d + 1, '\n');
   1077      1.1  mrg   buffer->next_line = s + 1;
   1078      1.1  mrg }
   1079  1.1.1.3  mrg 
   1080  1.1.1.3  mrg template <bool lexing_raw_string>
   1081  1.1.1.3  mrg static bool get_fresh_line_impl (cpp_reader *pfile);
   1082      1.1  mrg 
   1083      1.1  mrg /* Return true if the trigraph indicated by NOTE should be warned
   1084      1.1  mrg    about in a comment.  */
   1085      1.1  mrg static bool
   1086      1.1  mrg warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
   1087      1.1  mrg {
   1088      1.1  mrg   const uchar *p;
   1089      1.1  mrg 
   1090      1.1  mrg   /* Within comments we don't warn about trigraphs, unless the
   1091      1.1  mrg      trigraph forms an escaped newline, as that may change
   1092      1.1  mrg      behavior.  */
   1093      1.1  mrg   if (note->type != '/')
   1094      1.1  mrg     return false;
   1095      1.1  mrg 
   1096      1.1  mrg   /* If -trigraphs, then this was an escaped newline iff the next note
   1097      1.1  mrg      is coincident.  */
   1098      1.1  mrg   if (CPP_OPTION (pfile, trigraphs))
   1099      1.1  mrg     return note[1].pos == note->pos;
   1100      1.1  mrg 
   1101      1.1  mrg   /* Otherwise, see if this forms an escaped newline.  */
   1102      1.1  mrg   p = note->pos + 3;
   1103      1.1  mrg   while (is_nvspace (*p))
   1104      1.1  mrg     p++;
   1105      1.1  mrg 
   1106      1.1  mrg   /* There might have been escaped newlines between the trigraph and the
   1107      1.1  mrg      newline we found.  Hence the position test.  */
   1108      1.1  mrg   return (*p == '\n' && p < note[1].pos);
   1109      1.1  mrg }
   1110      1.1  mrg 
   1111      1.1  mrg /* Process the notes created by add_line_note as far as the current
   1112      1.1  mrg    location.  */
   1113      1.1  mrg void
   1114      1.1  mrg _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
   1115      1.1  mrg {
   1116      1.1  mrg   cpp_buffer *buffer = pfile->buffer;
   1117      1.1  mrg 
   1118      1.1  mrg   for (;;)
   1119      1.1  mrg     {
   1120      1.1  mrg       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
   1121      1.1  mrg       unsigned int col;
   1122      1.1  mrg 
   1123      1.1  mrg       if (note->pos > buffer->cur)
   1124      1.1  mrg 	break;
   1125      1.1  mrg 
   1126      1.1  mrg       buffer->cur_note++;
   1127      1.1  mrg       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
   1128      1.1  mrg 
   1129      1.1  mrg       if (note->type == '\\' || note->type == ' ')
   1130      1.1  mrg 	{
   1131      1.1  mrg 	  if (note->type == ' ' && !in_comment)
   1132      1.1  mrg 	    cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
   1133      1.1  mrg 				 "backslash and newline separated by space");
   1134      1.1  mrg 
   1135      1.1  mrg 	  if (buffer->next_line > buffer->rlimit)
   1136      1.1  mrg 	    {
   1137      1.1  mrg 	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
   1138      1.1  mrg 				   "backslash-newline at end of file");
   1139      1.1  mrg 	      /* Prevent "no newline at end of file" warning.  */
   1140      1.1  mrg 	      buffer->next_line = buffer->rlimit;
   1141      1.1  mrg 	    }
   1142      1.1  mrg 
   1143      1.1  mrg 	  buffer->line_base = note->pos;
   1144      1.1  mrg 	  CPP_INCREMENT_LINE (pfile, 0);
   1145      1.1  mrg 	}
   1146      1.1  mrg       else if (_cpp_trigraph_map[note->type])
   1147      1.1  mrg 	{
   1148      1.1  mrg 	  if (CPP_OPTION (pfile, warn_trigraphs)
   1149      1.1  mrg 	      && (!in_comment || warn_in_comment (pfile, note)))
   1150      1.1  mrg 	    {
   1151      1.1  mrg 	      if (CPP_OPTION (pfile, trigraphs))
   1152      1.1  mrg 		cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
   1153      1.1  mrg                                        pfile->line_table->highest_line, col,
   1154      1.1  mrg 				       "trigraph ??%c converted to %c",
   1155      1.1  mrg 				       note->type,
   1156      1.1  mrg 				       (int) _cpp_trigraph_map[note->type]);
   1157      1.1  mrg 	      else
   1158      1.1  mrg 		{
   1159      1.1  mrg 		  cpp_warning_with_line
   1160      1.1  mrg 		    (pfile, CPP_W_TRIGRAPHS,
   1161      1.1  mrg                      pfile->line_table->highest_line, col,
   1162      1.1  mrg 		     "trigraph ??%c ignored, use -trigraphs to enable",
   1163      1.1  mrg 		     note->type);
   1164      1.1  mrg 		}
   1165      1.1  mrg 	    }
   1166      1.1  mrg 	}
   1167      1.1  mrg       else if (note->type == 0)
   1168      1.1  mrg 	/* Already processed in lex_raw_string.  */;
   1169      1.1  mrg       else
   1170      1.1  mrg 	abort ();
   1171      1.1  mrg     }
   1172      1.1  mrg }
   1173      1.1  mrg 
   1174      1.1  mrg namespace bidi {
   1175      1.1  mrg   enum class kind {
   1176      1.1  mrg     NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL
   1177      1.1  mrg   };
   1178      1.1  mrg 
   1179      1.1  mrg   /* All the UTF-8 encodings of bidi characters start with E2.  */
   1180      1.1  mrg   constexpr uchar utf8_start = 0xe2;
   1181      1.1  mrg 
   1182      1.1  mrg   struct context
   1183      1.1  mrg   {
   1184      1.1  mrg     context () {}
   1185      1.1  mrg     context (location_t loc, kind k, bool pdf, bool ucn)
   1186      1.1  mrg     : m_loc (loc), m_kind (k), m_pdf (pdf), m_ucn (ucn)
   1187      1.1  mrg     {
   1188      1.1  mrg     }
   1189      1.1  mrg 
   1190      1.1  mrg     kind get_pop_kind () const
   1191      1.1  mrg     {
   1192      1.1  mrg       return m_pdf ? kind::PDF : kind::PDI;
   1193      1.1  mrg     }
   1194      1.1  mrg     bool ucn_p () const
   1195      1.1  mrg     {
   1196      1.1  mrg       return m_ucn;
   1197      1.1  mrg     }
   1198      1.1  mrg 
   1199      1.1  mrg     location_t m_loc;
   1200      1.1  mrg     kind m_kind;
   1201      1.1  mrg     unsigned m_pdf : 1;
   1202      1.1  mrg     unsigned m_ucn : 1;
   1203      1.1  mrg   };
   1204      1.1  mrg 
   1205      1.1  mrg   /* A vector holding currently open bidi contexts.  We use a char for
   1206      1.1  mrg      each context, its LSB is 1 if it represents a PDF context, 0 if it
   1207      1.1  mrg      represents a PDI context.  The next bit is 1 if this context was open
   1208      1.1  mrg      by a bidi character written as a UCN, and 0 when it was UTF-8.  */
   1209      1.1  mrg   semi_embedded_vec <context, 16> vec;
   1210      1.1  mrg 
   1211      1.1  mrg   /* Close the whole comment/identifier/string literal/character constant
   1212      1.1  mrg      context.  */
   1213      1.1  mrg   void on_close ()
   1214      1.1  mrg   {
   1215      1.1  mrg     vec.truncate (0);
   1216      1.1  mrg   }
   1217      1.1  mrg 
   1218      1.1  mrg   /* Pop the last element in the vector.  */
   1219      1.1  mrg   void pop ()
   1220      1.1  mrg   {
   1221      1.1  mrg     unsigned int len = vec.count ();
   1222      1.1  mrg     gcc_checking_assert (len > 0);
   1223      1.1  mrg     vec.truncate (len - 1);
   1224      1.1  mrg   }
   1225      1.1  mrg 
   1226      1.1  mrg   /* Return the pop kind of the context of the Ith element.  */
   1227      1.1  mrg   kind pop_kind_at (unsigned int i)
   1228      1.1  mrg   {
   1229      1.1  mrg     return vec[i].get_pop_kind ();
   1230      1.1  mrg   }
   1231      1.1  mrg 
   1232      1.1  mrg   /* Return the pop kind of the context that is currently opened.  */
   1233      1.1  mrg   kind current_ctx ()
   1234      1.1  mrg   {
   1235      1.1  mrg     unsigned int len = vec.count ();
   1236      1.1  mrg     if (len == 0)
   1237      1.1  mrg       return kind::NONE;
   1238      1.1  mrg     return vec[len - 1].get_pop_kind ();
   1239      1.1  mrg   }
   1240      1.1  mrg 
   1241      1.1  mrg   /* Return true if the current context comes from a UCN origin, that is,
   1242      1.1  mrg      the bidi char which started this bidi context was written as a UCN.  */
   1243      1.1  mrg   bool current_ctx_ucn_p ()
   1244      1.1  mrg   {
   1245      1.1  mrg     unsigned int len = vec.count ();
   1246      1.1  mrg     gcc_checking_assert (len > 0);
   1247      1.1  mrg     return vec[len - 1].m_ucn;
   1248      1.1  mrg   }
   1249      1.1  mrg 
   1250      1.1  mrg   location_t current_ctx_loc ()
   1251      1.1  mrg   {
   1252      1.1  mrg     unsigned int len = vec.count ();
   1253      1.1  mrg     gcc_checking_assert (len > 0);
   1254      1.1  mrg     return vec[len - 1].m_loc;
   1255      1.1  mrg   }
   1256      1.1  mrg 
   1257      1.1  mrg   /* We've read a bidi char, update the current vector as necessary.
   1258      1.1  mrg      LOC is only valid when K is not kind::NONE.  */
   1259      1.1  mrg   void on_char (kind k, bool ucn_p, location_t loc)
   1260      1.1  mrg   {
   1261      1.1  mrg     switch (k)
   1262      1.1  mrg       {
   1263      1.1  mrg       case kind::LRE:
   1264      1.1  mrg       case kind::RLE:
   1265      1.1  mrg       case kind::LRO:
   1266      1.1  mrg       case kind::RLO:
   1267      1.1  mrg 	vec.push (context (loc, k, true, ucn_p));
   1268      1.1  mrg 	break;
   1269      1.1  mrg       case kind::LRI:
   1270      1.1  mrg       case kind::RLI:
   1271      1.1  mrg       case kind::FSI:
   1272      1.1  mrg 	vec.push (context (loc, k, false, ucn_p));
   1273      1.1  mrg 	break;
   1274      1.1  mrg       /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO
   1275      1.1  mrg 	 whose scope has not yet been terminated.  */
   1276      1.1  mrg       case kind::PDF:
   1277      1.1  mrg 	if (current_ctx () == kind::PDF)
   1278      1.1  mrg 	  pop ();
   1279      1.1  mrg 	break;
   1280      1.1  mrg       /* PDI terminates the scope of the last LRI, RLI, or FSI whose
   1281      1.1  mrg 	 scope has not yet been terminated, as well as the scopes of
   1282      1.1  mrg 	 any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not
   1283      1.1  mrg 	 yet been terminated.  */
   1284      1.1  mrg       case kind::PDI:
   1285      1.1  mrg 	for (int i = vec.count () - 1; i >= 0; --i)
   1286      1.1  mrg 	  if (pop_kind_at (i) == kind::PDI)
   1287      1.1  mrg 	    {
   1288      1.1  mrg 	      vec.truncate (i);
   1289      1.1  mrg 	      break;
   1290      1.1  mrg 	    }
   1291      1.1  mrg 	break;
   1292      1.1  mrg       case kind::LTR:
   1293      1.1  mrg       case kind::RTL:
   1294      1.1  mrg 	/* These aren't popped by a PDF/PDI.  */
   1295      1.1  mrg 	break;
   1296      1.1  mrg       ATTR_LIKELY case kind::NONE:
   1297      1.1  mrg 	break;
   1298      1.1  mrg       default:
   1299      1.1  mrg 	abort ();
   1300      1.1  mrg       }
   1301      1.1  mrg   }
   1302      1.1  mrg 
   1303      1.1  mrg   /* Return a descriptive string for K.  */
   1304      1.1  mrg   const char *to_str (kind k)
   1305      1.1  mrg   {
   1306      1.1  mrg     switch (k)
   1307      1.1  mrg       {
   1308      1.1  mrg       case kind::LRE:
   1309      1.1  mrg 	return "U+202A (LEFT-TO-RIGHT EMBEDDING)";
   1310      1.1  mrg       case kind::RLE:
   1311      1.1  mrg 	return "U+202B (RIGHT-TO-LEFT EMBEDDING)";
   1312      1.1  mrg       case kind::LRO:
   1313      1.1  mrg 	return "U+202D (LEFT-TO-RIGHT OVERRIDE)";
   1314      1.1  mrg       case kind::RLO:
   1315      1.1  mrg 	return "U+202E (RIGHT-TO-LEFT OVERRIDE)";
   1316      1.1  mrg       case kind::LRI:
   1317      1.1  mrg 	return "U+2066 (LEFT-TO-RIGHT ISOLATE)";
   1318      1.1  mrg       case kind::RLI:
   1319      1.1  mrg 	return "U+2067 (RIGHT-TO-LEFT ISOLATE)";
   1320      1.1  mrg       case kind::FSI:
   1321      1.1  mrg 	return "U+2068 (FIRST STRONG ISOLATE)";
   1322      1.1  mrg       case kind::PDF:
   1323      1.1  mrg 	return "U+202C (POP DIRECTIONAL FORMATTING)";
   1324      1.1  mrg       case kind::PDI:
   1325      1.1  mrg 	return "U+2069 (POP DIRECTIONAL ISOLATE)";
   1326      1.1  mrg       case kind::LTR:
   1327      1.1  mrg 	return "U+200E (LEFT-TO-RIGHT MARK)";
   1328      1.1  mrg       case kind::RTL:
   1329      1.1  mrg 	return "U+200F (RIGHT-TO-LEFT MARK)";
   1330      1.1  mrg       default:
   1331      1.1  mrg 	abort ();
   1332      1.1  mrg       }
   1333      1.1  mrg   }
   1334      1.1  mrg }
   1335      1.1  mrg 
   1336      1.1  mrg /* Get location_t for the range of bytes [START, START + NUM_BYTES)
   1337      1.1  mrg    within the current line in FILE, with the caret at START.  */
   1338      1.1  mrg 
   1339      1.1  mrg static location_t
   1340      1.1  mrg get_location_for_byte_range_in_cur_line (cpp_reader *pfile,
   1341      1.1  mrg 					 const unsigned char *const start,
   1342      1.1  mrg 					 size_t num_bytes)
   1343      1.1  mrg {
   1344      1.1  mrg   gcc_checking_assert (num_bytes > 0);
   1345      1.1  mrg 
   1346      1.1  mrg   /* CPP_BUF_COLUMN and linemap_position_for_column both refer
   1347      1.1  mrg      to offsets in bytes, but CPP_BUF_COLUMN is 0-based,
   1348      1.1  mrg      whereas linemap_position_for_column is 1-based.  */
   1349      1.1  mrg 
   1350      1.1  mrg   /* Get 0-based offsets within the line.  */
   1351      1.1  mrg   size_t start_offset = CPP_BUF_COLUMN (pfile->buffer, start);
   1352      1.1  mrg   size_t end_offset = start_offset + num_bytes - 1;
   1353      1.1  mrg 
   1354      1.1  mrg   /* Now convert to location_t, where "columns" are 1-based byte offsets.  */
   1355      1.1  mrg   location_t start_loc = linemap_position_for_column (pfile->line_table,
   1356      1.1  mrg 						      start_offset + 1);
   1357      1.1  mrg   location_t end_loc = linemap_position_for_column (pfile->line_table,
   1358      1.1  mrg 						     end_offset + 1);
   1359      1.1  mrg 
   1360      1.1  mrg   if (start_loc == end_loc)
   1361      1.1  mrg     return start_loc;
   1362      1.1  mrg 
   1363      1.1  mrg   source_range src_range;
   1364      1.1  mrg   src_range.m_start = start_loc;
   1365  1.1.1.3  mrg   src_range.m_finish = end_loc;
   1366  1.1.1.3  mrg   location_t combined_loc
   1367  1.1.1.3  mrg     = pfile->line_table->get_or_create_combined_loc (start_loc,
   1368  1.1.1.3  mrg 						     src_range,
   1369  1.1.1.3  mrg 						     nullptr,
   1370      1.1  mrg 						     0);
   1371      1.1  mrg   return combined_loc;
   1372      1.1  mrg }
   1373      1.1  mrg 
   1374      1.1  mrg /* Parse a sequence of 3 bytes starting with P and return its bidi code.  */
   1375      1.1  mrg 
   1376      1.1  mrg static bidi::kind
   1377      1.1  mrg get_bidi_utf8_1 (const unsigned char *const p)
   1378      1.1  mrg {
   1379      1.1  mrg   gcc_checking_assert (p[0] == bidi::utf8_start);
   1380      1.1  mrg 
   1381      1.1  mrg   if (p[1] == 0x80)
   1382      1.1  mrg     switch (p[2])
   1383      1.1  mrg       {
   1384      1.1  mrg       case 0xaa:
   1385      1.1  mrg 	return bidi::kind::LRE;
   1386      1.1  mrg       case 0xab:
   1387      1.1  mrg 	return bidi::kind::RLE;
   1388      1.1  mrg       case 0xac:
   1389      1.1  mrg 	return bidi::kind::PDF;
   1390      1.1  mrg       case 0xad:
   1391      1.1  mrg 	return bidi::kind::LRO;
   1392      1.1  mrg       case 0xae:
   1393      1.1  mrg 	return bidi::kind::RLO;
   1394      1.1  mrg       case 0x8e:
   1395      1.1  mrg 	return bidi::kind::LTR;
   1396      1.1  mrg       case 0x8f:
   1397      1.1  mrg 	return bidi::kind::RTL;
   1398      1.1  mrg       default:
   1399      1.1  mrg 	break;
   1400      1.1  mrg       }
   1401      1.1  mrg   else if (p[1] == 0x81)
   1402      1.1  mrg     switch (p[2])
   1403      1.1  mrg       {
   1404      1.1  mrg       case 0xa6:
   1405      1.1  mrg 	return bidi::kind::LRI;
   1406      1.1  mrg       case 0xa7:
   1407      1.1  mrg 	return bidi::kind::RLI;
   1408      1.1  mrg       case 0xa8:
   1409      1.1  mrg 	return bidi::kind::FSI;
   1410      1.1  mrg       case 0xa9:
   1411      1.1  mrg 	return bidi::kind::PDI;
   1412      1.1  mrg       default:
   1413      1.1  mrg 	break;
   1414      1.1  mrg       }
   1415      1.1  mrg 
   1416      1.1  mrg   return bidi::kind::NONE;
   1417      1.1  mrg }
   1418      1.1  mrg 
   1419      1.1  mrg /* Parse a sequence of 3 bytes starting with P and return its bidi code.
   1420      1.1  mrg    If the kind is not NONE, write the location to *OUT.*/
   1421      1.1  mrg 
   1422      1.1  mrg static bidi::kind
   1423      1.1  mrg get_bidi_utf8 (cpp_reader *pfile, const unsigned char *const p, location_t *out)
   1424      1.1  mrg {
   1425      1.1  mrg   bidi::kind result = get_bidi_utf8_1 (p);
   1426      1.1  mrg   if (result != bidi::kind::NONE)
   1427      1.1  mrg     {
   1428      1.1  mrg       /* We have a sequence of 3 bytes starting at P.  */
   1429      1.1  mrg       *out = get_location_for_byte_range_in_cur_line (pfile, p, 3);
   1430      1.1  mrg     }
   1431      1.1  mrg   return result;
   1432      1.1  mrg }
   1433      1.1  mrg 
   1434      1.1  mrg /* Parse a UCN where P points just past \u or \U and return its bidi code.  */
   1435      1.1  mrg 
   1436  1.1.1.3  mrg static bidi::kind
   1437      1.1  mrg get_bidi_ucn_1 (const unsigned char *p, bool is_U, const unsigned char **end)
   1438      1.1  mrg {
   1439      1.1  mrg   /* 6.4.3 Universal Character Names
   1440      1.1  mrg       \u hex-quad
   1441  1.1.1.3  mrg       \U hex-quad hex-quad
   1442      1.1  mrg       \u { simple-hexadecimal-digit-sequence }
   1443      1.1  mrg      where \unnnn means \U0000nnnn.  */
   1444  1.1.1.3  mrg 
   1445      1.1  mrg   *end = p + 4;
   1446      1.1  mrg   if (is_U)
   1447      1.1  mrg     {
   1448      1.1  mrg       if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
   1449      1.1  mrg 	return bidi::kind::NONE;
   1450      1.1  mrg       /* Skip 4B so we can treat \u and \U the same below.  */
   1451  1.1.1.3  mrg       p += 4;
   1452  1.1.1.3  mrg       *end += 4;
   1453  1.1.1.3  mrg     }
   1454  1.1.1.3  mrg   else if (p[0] == '{')
   1455  1.1.1.3  mrg     {
   1456  1.1.1.3  mrg       p++;
   1457  1.1.1.3  mrg       while (*p == '0')
   1458  1.1.1.3  mrg 	p++;
   1459  1.1.1.3  mrg       if (p[0] != '2'
   1460  1.1.1.3  mrg 	  || p[1] != '0'
   1461  1.1.1.3  mrg 	  || !ISXDIGIT (p[2])
   1462  1.1.1.3  mrg 	  || !ISXDIGIT (p[3])
   1463  1.1.1.3  mrg 	  || p[4] != '}')
   1464  1.1.1.3  mrg 	return bidi::kind::NONE;
   1465      1.1  mrg       *end = p + 5;
   1466      1.1  mrg     }
   1467      1.1  mrg 
   1468      1.1  mrg   /* All code points we are looking for start with 20xx.  */
   1469      1.1  mrg   if (p[0] != '2' || p[1] != '0')
   1470      1.1  mrg     return bidi::kind::NONE;
   1471      1.1  mrg   else if (p[2] == '2')
   1472      1.1  mrg     switch (p[3])
   1473      1.1  mrg       {
   1474      1.1  mrg       case 'a':
   1475      1.1  mrg       case 'A':
   1476      1.1  mrg 	return bidi::kind::LRE;
   1477      1.1  mrg       case 'b':
   1478      1.1  mrg       case 'B':
   1479      1.1  mrg 	return bidi::kind::RLE;
   1480      1.1  mrg       case 'c':
   1481      1.1  mrg       case 'C':
   1482      1.1  mrg 	return bidi::kind::PDF;
   1483      1.1  mrg       case 'd':
   1484      1.1  mrg       case 'D':
   1485      1.1  mrg 	return bidi::kind::LRO;
   1486      1.1  mrg       case 'e':
   1487      1.1  mrg       case 'E':
   1488      1.1  mrg 	return bidi::kind::RLO;
   1489      1.1  mrg       default:
   1490      1.1  mrg 	break;
   1491      1.1  mrg       }
   1492      1.1  mrg   else if (p[2] == '6')
   1493      1.1  mrg     switch (p[3])
   1494      1.1  mrg       {
   1495      1.1  mrg       case '6':
   1496      1.1  mrg 	return bidi::kind::LRI;
   1497      1.1  mrg       case '7':
   1498      1.1  mrg 	return bidi::kind::RLI;
   1499      1.1  mrg       case '8':
   1500      1.1  mrg 	return bidi::kind::FSI;
   1501      1.1  mrg       case '9':
   1502      1.1  mrg 	return bidi::kind::PDI;
   1503      1.1  mrg       default:
   1504      1.1  mrg 	break;
   1505      1.1  mrg       }
   1506      1.1  mrg   else if (p[2] == '0')
   1507      1.1  mrg     switch (p[3])
   1508      1.1  mrg       {
   1509      1.1  mrg       case 'e':
   1510      1.1  mrg       case 'E':
   1511      1.1  mrg 	return bidi::kind::LTR;
   1512      1.1  mrg       case 'f':
   1513      1.1  mrg       case 'F':
   1514      1.1  mrg 	return bidi::kind::RTL;
   1515      1.1  mrg       default:
   1516      1.1  mrg 	break;
   1517      1.1  mrg       }
   1518      1.1  mrg 
   1519      1.1  mrg   return bidi::kind::NONE;
   1520      1.1  mrg }
   1521      1.1  mrg 
   1522  1.1.1.3  mrg /* Parse a UCN where P points just past \u or \U and return its bidi code.
   1523      1.1  mrg    If the kind is not NONE, write the location to *OUT.  */
   1524      1.1  mrg 
   1525  1.1.1.3  mrg static bidi::kind
   1526      1.1  mrg get_bidi_ucn (cpp_reader *pfile, const unsigned char *p, bool is_U,
   1527      1.1  mrg 	      location_t *out)
   1528  1.1.1.3  mrg {
   1529  1.1.1.3  mrg   const unsigned char *end;
   1530      1.1  mrg   bidi::kind result = get_bidi_ucn_1 (p, is_U, &end);
   1531      1.1  mrg   if (result != bidi::kind::NONE)
   1532      1.1  mrg     {
   1533  1.1.1.3  mrg       const unsigned char *start = p - 2;
   1534      1.1  mrg       size_t num_bytes = end - start;
   1535      1.1  mrg       *out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes);
   1536      1.1  mrg     }
   1537      1.1  mrg   return result;
   1538      1.1  mrg }
   1539  1.1.1.3  mrg 
   1540  1.1.1.3  mrg /* Parse a named universal character escape where P points just past \N and
   1541  1.1.1.3  mrg    return its bidi code.  If the kind is not NONE, write the location to
   1542  1.1.1.3  mrg    *OUT.  */
   1543  1.1.1.3  mrg 
   1544  1.1.1.3  mrg static bidi::kind
   1545  1.1.1.3  mrg get_bidi_named (cpp_reader *pfile, const unsigned char *p, location_t *out)
   1546  1.1.1.3  mrg {
   1547  1.1.1.3  mrg   bidi::kind result = bidi::kind::NONE;
   1548  1.1.1.3  mrg   if (*p != '{')
   1549  1.1.1.3  mrg     return bidi::kind::NONE;
   1550  1.1.1.3  mrg   if (strncmp ((const char *) (p + 1), "LEFT-TO-RIGHT ", 14) == 0)
   1551  1.1.1.3  mrg     {
   1552  1.1.1.3  mrg       if (strncmp ((const char *) (p + 15), "MARK}", 5) == 0)
   1553  1.1.1.3  mrg 	result = bidi::kind::LTR;
   1554  1.1.1.3  mrg       else if (strncmp ((const char *) (p + 15), "EMBEDDING}", 10) == 0)
   1555  1.1.1.3  mrg 	result = bidi::kind::LRE;
   1556  1.1.1.3  mrg       else if (strncmp ((const char *) (p + 15), "OVERRIDE}", 9) == 0)
   1557  1.1.1.3  mrg 	result = bidi::kind::LRO;
   1558  1.1.1.3  mrg       else if (strncmp ((const char *) (p + 15), "ISOLATE}", 8) == 0)
   1559  1.1.1.3  mrg 	result = bidi::kind::LRI;
   1560  1.1.1.3  mrg     }
   1561  1.1.1.3  mrg   else if (strncmp ((const char *) (p + 1), "RIGHT-TO-LEFT ", 14) == 0)
   1562  1.1.1.3  mrg     {
   1563  1.1.1.3  mrg       if (strncmp ((const char *) (p + 15), "MARK}", 5) == 0)
   1564  1.1.1.3  mrg 	result = bidi::kind::RTL;
   1565  1.1.1.3  mrg       else if (strncmp ((const char *) (p + 15), "EMBEDDING}", 10) == 0)
   1566  1.1.1.3  mrg 	result = bidi::kind::RLE;
   1567  1.1.1.3  mrg       else if (strncmp ((const char *) (p + 15), "OVERRIDE}", 9) == 0)
   1568  1.1.1.3  mrg 	result = bidi::kind::RLO;
   1569  1.1.1.3  mrg       else if (strncmp ((const char *) (p + 15), "ISOLATE}", 8) == 0)
   1570  1.1.1.3  mrg 	result = bidi::kind::RLI;
   1571  1.1.1.3  mrg     }
   1572  1.1.1.3  mrg   else if (strncmp ((const char *) (p + 1), "POP DIRECTIONAL ", 16) == 0)
   1573  1.1.1.3  mrg     {
   1574  1.1.1.3  mrg       if (strncmp ((const char *) (p + 16), "FORMATTING}", 11) == 0)
   1575  1.1.1.3  mrg 	result = bidi::kind::PDF;
   1576  1.1.1.3  mrg       else if (strncmp ((const char *) (p + 16), "ISOLATE}", 8) == 0)
   1577  1.1.1.3  mrg 	result = bidi::kind::PDI;
   1578  1.1.1.3  mrg     }
   1579  1.1.1.3  mrg   else if (strncmp ((const char *) (p + 1), "FIRST STRONG ISOLATE}", 21) == 0)
   1580  1.1.1.3  mrg     result = bidi::kind::FSI;
   1581  1.1.1.3  mrg   if (result != bidi::kind::NONE)
   1582  1.1.1.3  mrg     *out = get_location_for_byte_range_in_cur_line (pfile, p - 2,
   1583  1.1.1.3  mrg 						    (strchr ((const char *)
   1584  1.1.1.3  mrg 							     (p + 1), '}')
   1585  1.1.1.3  mrg 						     - (const char *) p)
   1586  1.1.1.3  mrg 						    + 3);
   1587  1.1.1.3  mrg   return result;
   1588  1.1.1.3  mrg }
   1589      1.1  mrg 
   1590      1.1  mrg /* Subclass of rich_location for reporting on unpaired UTF-8
   1591      1.1  mrg    bidirectional control character(s).
   1592      1.1  mrg    Escape the source lines on output, and show all unclosed
   1593      1.1  mrg    bidi context, labelling everything.  */
   1594      1.1  mrg 
   1595      1.1  mrg class unpaired_bidi_rich_location : public rich_location
   1596      1.1  mrg {
   1597      1.1  mrg  public:
   1598      1.1  mrg   class custom_range_label : public range_label
   1599      1.1  mrg   {
   1600  1.1.1.3  mrg    public:
   1601      1.1  mrg      label_text get_text (unsigned range_idx) const final override
   1602      1.1  mrg      {
   1603      1.1  mrg        /* range 0 is the primary location; each subsequent range i + 1
   1604      1.1  mrg 	  is for bidi::vec[i].  */
   1605      1.1  mrg        if (range_idx > 0)
   1606      1.1  mrg 	 {
   1607      1.1  mrg 	   const bidi::context &ctxt (bidi::vec[range_idx - 1]);
   1608      1.1  mrg 	   return label_text::borrow (bidi::to_str (ctxt.m_kind));
   1609      1.1  mrg 	 }
   1610      1.1  mrg        else
   1611      1.1  mrg 	 return label_text::borrow (_("end of bidirectional context"));
   1612      1.1  mrg      }
   1613      1.1  mrg   };
   1614      1.1  mrg 
   1615      1.1  mrg   unpaired_bidi_rich_location (cpp_reader *pfile, location_t loc)
   1616      1.1  mrg   : rich_location (pfile->line_table, loc, &m_custom_label)
   1617      1.1  mrg   {
   1618      1.1  mrg     set_escape_on_output (true);
   1619      1.1  mrg     for (unsigned i = 0; i < bidi::vec.count (); i++)
   1620      1.1  mrg       add_range (bidi::vec[i].m_loc,
   1621      1.1  mrg 		 SHOW_RANGE_WITHOUT_CARET,
   1622      1.1  mrg 		 &m_custom_label);
   1623      1.1  mrg   }
   1624      1.1  mrg 
   1625      1.1  mrg  private:
   1626      1.1  mrg    custom_range_label m_custom_label;
   1627      1.1  mrg };
   1628      1.1  mrg 
   1629      1.1  mrg /* We're closing a bidi context, that is, we've encountered a newline,
   1630      1.1  mrg    are closing a C-style comment, or are at the end of a string literal,
   1631      1.1  mrg    character constant, or identifier.  Warn if this context was not
   1632      1.1  mrg    properly terminated by a PDI or PDF.  P points to the last character
   1633      1.1  mrg    in this context.  */
   1634      1.1  mrg 
   1635      1.1  mrg static void
   1636      1.1  mrg maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p)
   1637      1.1  mrg {
   1638      1.1  mrg   const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
   1639      1.1  mrg   if (bidi::vec.count () > 0
   1640      1.1  mrg       && (warn_bidi & bidirectional_unpaired
   1641      1.1  mrg 	  && (!bidi::current_ctx_ucn_p ()
   1642      1.1  mrg 	      || (warn_bidi & bidirectional_ucn))))
   1643      1.1  mrg     {
   1644      1.1  mrg       const location_t loc
   1645      1.1  mrg 	= linemap_position_for_column (pfile->line_table,
   1646      1.1  mrg 				       CPP_BUF_COLUMN (pfile->buffer, p));
   1647      1.1  mrg       unpaired_bidi_rich_location rich_loc (pfile, loc);
   1648      1.1  mrg       /* cpp_callbacks doesn't yet have a way to handle singular vs plural
   1649      1.1  mrg 	 forms of a diagnostic, so fake it for now.  */
   1650      1.1  mrg       if (bidi::vec.count () > 1)
   1651      1.1  mrg 	cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
   1652      1.1  mrg 			"unpaired UTF-8 bidirectional control characters "
   1653      1.1  mrg 			"detected");
   1654      1.1  mrg       else
   1655      1.1  mrg 	cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
   1656      1.1  mrg 			"unpaired UTF-8 bidirectional control character "
   1657      1.1  mrg 			"detected");
   1658      1.1  mrg     }
   1659      1.1  mrg   /* We're done with this context.  */
   1660      1.1  mrg   bidi::on_close ();
   1661      1.1  mrg }
   1662      1.1  mrg 
   1663      1.1  mrg /* We're at the beginning or in the middle of an identifier/comment/string
   1664      1.1  mrg    literal/character constant.  Warn if we've encountered a bidi character.
   1665      1.1  mrg    KIND says which bidi control character it was; UCN_P is true iff this bidi
   1666      1.1  mrg    control character was written as a UCN.  LOC is the location of the
   1667      1.1  mrg    character, but is only valid if KIND != bidi::kind::NONE.  */
   1668      1.1  mrg 
   1669      1.1  mrg static void
   1670      1.1  mrg maybe_warn_bidi_on_char (cpp_reader *pfile, bidi::kind kind,
   1671      1.1  mrg 			 bool ucn_p, location_t loc)
   1672      1.1  mrg {
   1673      1.1  mrg   if (__builtin_expect (kind == bidi::kind::NONE, 1))
   1674      1.1  mrg     return;
   1675      1.1  mrg 
   1676      1.1  mrg   const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
   1677      1.1  mrg 
   1678      1.1  mrg   if (warn_bidi & (bidirectional_unpaired|bidirectional_any))
   1679      1.1  mrg     {
   1680      1.1  mrg       rich_location rich_loc (pfile->line_table, loc);
   1681      1.1  mrg       rich_loc.set_escape_on_output (true);
   1682      1.1  mrg 
   1683      1.1  mrg       /* It seems excessive to warn about a PDI/PDF that is closing
   1684      1.1  mrg 	 an opened context because we've already warned about the
   1685      1.1  mrg 	 opening character.  Except warn when we have a UCN x UTF-8
   1686      1.1  mrg 	 mismatch, if UCN checking is enabled.  */
   1687      1.1  mrg       if (kind == bidi::current_ctx ())
   1688      1.1  mrg 	{
   1689      1.1  mrg 	  if (warn_bidi == (bidirectional_unpaired|bidirectional_ucn)
   1690      1.1  mrg 	      && bidi::current_ctx_ucn_p () != ucn_p)
   1691      1.1  mrg 	    {
   1692      1.1  mrg 	      rich_loc.add_range (bidi::current_ctx_loc ());
   1693      1.1  mrg 	      cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
   1694      1.1  mrg 			      "UTF-8 vs UCN mismatch when closing "
   1695      1.1  mrg 			      "a context by \"%s\"", bidi::to_str (kind));
   1696      1.1  mrg 	    }
   1697      1.1  mrg 	}
   1698      1.1  mrg       else if (warn_bidi & bidirectional_any
   1699      1.1  mrg 	       && (!ucn_p || (warn_bidi & bidirectional_ucn)))
   1700      1.1  mrg 	{
   1701      1.1  mrg 	  if (kind == bidi::kind::PDF || kind == bidi::kind::PDI)
   1702      1.1  mrg 	    cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
   1703      1.1  mrg 			    "\"%s\" is closing an unopened context",
   1704      1.1  mrg 			    bidi::to_str (kind));
   1705      1.1  mrg 	  else
   1706      1.1  mrg 	    cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
   1707      1.1  mrg 			    "found problematic Unicode character \"%s\"",
   1708      1.1  mrg 			    bidi::to_str (kind));
   1709      1.1  mrg 	}
   1710      1.1  mrg     }
   1711      1.1  mrg   /* We're done with this context.  */
   1712      1.1  mrg   bidi::on_char (kind, ucn_p, loc);
   1713      1.1  mrg }
   1714  1.1.1.3  mrg 
   1715  1.1.1.3  mrg static const cppchar_t utf8_continuation = 0x80;
   1716  1.1.1.3  mrg static const cppchar_t utf8_signifier = 0xC0;
   1717  1.1.1.3  mrg 
   1718  1.1.1.3  mrg /* Emit -Winvalid-utf8 warning on invalid UTF-8 character starting
   1719  1.1.1.3  mrg    at PFILE->buffer->cur.  Return a pointer after the diagnosed
   1720  1.1.1.3  mrg    invalid character.  */
   1721  1.1.1.3  mrg 
   1722  1.1.1.3  mrg static const uchar *
   1723  1.1.1.3  mrg _cpp_warn_invalid_utf8 (cpp_reader *pfile)
   1724  1.1.1.3  mrg {
   1725  1.1.1.3  mrg   cpp_buffer *buffer = pfile->buffer;
   1726  1.1.1.3  mrg   const uchar *cur = buffer->cur;
   1727  1.1.1.3  mrg   bool pedantic = (CPP_PEDANTIC (pfile)
   1728  1.1.1.3  mrg 		   && CPP_OPTION (pfile, cpp_warn_invalid_utf8) == 2);
   1729  1.1.1.3  mrg 
   1730  1.1.1.3  mrg   if (cur[0] < utf8_signifier
   1731  1.1.1.3  mrg       || cur[1] < utf8_continuation || cur[1] >= utf8_signifier)
   1732  1.1.1.3  mrg     {
   1733  1.1.1.3  mrg       if (pedantic)
   1734  1.1.1.3  mrg 	cpp_error_with_line (pfile, CPP_DL_PEDWARN,
   1735  1.1.1.3  mrg 			     pfile->line_table->highest_line,
   1736  1.1.1.3  mrg 			     CPP_BUF_COL (buffer),
   1737  1.1.1.3  mrg 			     "invalid UTF-8 character <%x>",
   1738  1.1.1.3  mrg 			     cur[0]);
   1739  1.1.1.3  mrg       else
   1740  1.1.1.3  mrg 	cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
   1741  1.1.1.3  mrg 			       pfile->line_table->highest_line,
   1742  1.1.1.3  mrg 			       CPP_BUF_COL (buffer),
   1743  1.1.1.3  mrg 			       "invalid UTF-8 character <%x>",
   1744  1.1.1.3  mrg 			       cur[0]);
   1745  1.1.1.3  mrg       return cur + 1;
   1746  1.1.1.3  mrg     }
   1747  1.1.1.3  mrg   else if (cur[2] < utf8_continuation || cur[2] >= utf8_signifier)
   1748  1.1.1.3  mrg     {
   1749  1.1.1.3  mrg       if (pedantic)
   1750  1.1.1.3  mrg 	cpp_error_with_line (pfile, CPP_DL_PEDWARN,
   1751  1.1.1.3  mrg 			     pfile->line_table->highest_line,
   1752  1.1.1.3  mrg 			     CPP_BUF_COL (buffer),
   1753  1.1.1.3  mrg 			     "invalid UTF-8 character <%x><%x>",
   1754  1.1.1.3  mrg 			     cur[0], cur[1]);
   1755  1.1.1.3  mrg       else
   1756  1.1.1.3  mrg 	cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
   1757  1.1.1.3  mrg 			       pfile->line_table->highest_line,
   1758  1.1.1.3  mrg 			       CPP_BUF_COL (buffer),
   1759  1.1.1.3  mrg 			       "invalid UTF-8 character <%x><%x>",
   1760  1.1.1.3  mrg 			       cur[0], cur[1]);
   1761  1.1.1.3  mrg       return cur + 2;
   1762  1.1.1.3  mrg     }
   1763  1.1.1.3  mrg   else if (cur[3] < utf8_continuation || cur[3] >= utf8_signifier)
   1764  1.1.1.3  mrg     {
   1765  1.1.1.3  mrg       if (pedantic)
   1766  1.1.1.3  mrg 	cpp_error_with_line (pfile, CPP_DL_PEDWARN,
   1767  1.1.1.3  mrg 			     pfile->line_table->highest_line,
   1768  1.1.1.3  mrg 			     CPP_BUF_COL (buffer),
   1769  1.1.1.3  mrg 			     "invalid UTF-8 character <%x><%x><%x>",
   1770  1.1.1.3  mrg 			     cur[0], cur[1], cur[2]);
   1771  1.1.1.3  mrg       else
   1772  1.1.1.3  mrg 	cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
   1773  1.1.1.3  mrg 			       pfile->line_table->highest_line,
   1774  1.1.1.3  mrg 			       CPP_BUF_COL (buffer),
   1775  1.1.1.3  mrg 			       "invalid UTF-8 character <%x><%x><%x>",
   1776  1.1.1.3  mrg 			       cur[0], cur[1], cur[2]);
   1777  1.1.1.3  mrg       return cur + 3;
   1778  1.1.1.3  mrg     }
   1779  1.1.1.3  mrg   else
   1780  1.1.1.3  mrg     {
   1781  1.1.1.3  mrg       if (pedantic)
   1782  1.1.1.3  mrg 	cpp_error_with_line (pfile, CPP_DL_PEDWARN,
   1783  1.1.1.3  mrg 			     pfile->line_table->highest_line,
   1784  1.1.1.3  mrg 			     CPP_BUF_COL (buffer),
   1785  1.1.1.3  mrg 			     "invalid UTF-8 character <%x><%x><%x><%x>",
   1786  1.1.1.3  mrg 			     cur[0], cur[1], cur[2], cur[3]);
   1787  1.1.1.3  mrg       else
   1788  1.1.1.3  mrg 	cpp_warning_with_line (pfile, CPP_W_INVALID_UTF8,
   1789  1.1.1.3  mrg 			       pfile->line_table->highest_line,
   1790  1.1.1.3  mrg 			       CPP_BUF_COL (buffer),
   1791  1.1.1.3  mrg 			       "invalid UTF-8 character <%x><%x><%x><%x>",
   1792  1.1.1.3  mrg 			       cur[0], cur[1], cur[2], cur[3]);
   1793  1.1.1.3  mrg       return cur + 4;
   1794  1.1.1.3  mrg     }
   1795  1.1.1.3  mrg }
   1796  1.1.1.3  mrg 
   1797  1.1.1.3  mrg /* Helper function of *skip_*_comment and lex*_string.  For C,
   1798  1.1.1.3  mrg    character at CUR[-1] with MSB set handle -Wbidi-chars* and
   1799  1.1.1.3  mrg    -Winvalid-utf8 diagnostics and return pointer to first character
   1800  1.1.1.3  mrg    that should be processed next.  */
   1801  1.1.1.3  mrg 
   1802  1.1.1.3  mrg static inline const uchar *
   1803  1.1.1.3  mrg _cpp_handle_multibyte_utf8 (cpp_reader *pfile, uchar c,
   1804  1.1.1.3  mrg 			    const uchar *cur, bool warn_bidi_p,
   1805  1.1.1.3  mrg 			    bool warn_invalid_utf8_p)
   1806  1.1.1.3  mrg {
   1807  1.1.1.3  mrg   /* If this is a beginning of a UTF-8 encoding, it might be
   1808  1.1.1.3  mrg      a bidirectional control character.  */
   1809  1.1.1.3  mrg   if (c == bidi::utf8_start && warn_bidi_p)
   1810  1.1.1.3  mrg     {
   1811  1.1.1.3  mrg       location_t loc;
   1812  1.1.1.3  mrg       bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
   1813  1.1.1.3  mrg       maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
   1814  1.1.1.3  mrg     }
   1815  1.1.1.3  mrg   if (!warn_invalid_utf8_p)
   1816  1.1.1.3  mrg     return cur;
   1817  1.1.1.3  mrg   if (c >= utf8_signifier)
   1818  1.1.1.3  mrg     {
   1819  1.1.1.3  mrg       cppchar_t s;
   1820  1.1.1.3  mrg       const uchar *pstr = cur - 1;
   1821  1.1.1.3  mrg       if (_cpp_valid_utf8 (pfile, &pstr, pfile->buffer->rlimit, 0, NULL, &s)
   1822  1.1.1.3  mrg 	  && s <= UCS_LIMIT)
   1823  1.1.1.3  mrg 	return pstr;
   1824  1.1.1.3  mrg     }
   1825  1.1.1.3  mrg   pfile->buffer->cur = cur - 1;
   1826  1.1.1.3  mrg   return _cpp_warn_invalid_utf8 (pfile);
   1827  1.1.1.3  mrg }
   1828      1.1  mrg 
   1829      1.1  mrg /* Skip a C-style block comment.  We find the end of the comment by
   1830      1.1  mrg    seeing if an asterisk is before every '/' we encounter.  Returns
   1831      1.1  mrg    nonzero if comment terminated by EOF, zero otherwise.
   1832      1.1  mrg 
   1833      1.1  mrg    Buffer->cur points to the initial asterisk of the comment.  */
   1834      1.1  mrg bool
   1835      1.1  mrg _cpp_skip_block_comment (cpp_reader *pfile)
   1836      1.1  mrg {
   1837      1.1  mrg   cpp_buffer *buffer = pfile->buffer;
   1838      1.1  mrg   const uchar *cur = buffer->cur;
   1839      1.1  mrg   uchar c;
   1840  1.1.1.3  mrg   const bool warn_bidi_p = pfile->warn_bidi_p ();
   1841  1.1.1.3  mrg   const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
   1842      1.1  mrg   const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
   1843      1.1  mrg 
   1844      1.1  mrg   cur++;
   1845      1.1  mrg   if (*cur == '/')
   1846      1.1  mrg     cur++;
   1847      1.1  mrg 
   1848      1.1  mrg   for (;;)
   1849      1.1  mrg     {
   1850      1.1  mrg       /* People like decorating comments with '*', so check for '/'
   1851      1.1  mrg 	 instead for efficiency.  */
   1852      1.1  mrg       c = *cur++;
   1853      1.1  mrg 
   1854      1.1  mrg       if (c == '/')
   1855      1.1  mrg 	{
   1856      1.1  mrg 	  if (cur[-2] == '*')
   1857      1.1  mrg 	    {
   1858      1.1  mrg 	      if (warn_bidi_p)
   1859      1.1  mrg 		maybe_warn_bidi_on_close (pfile, cur);
   1860      1.1  mrg 	      break;
   1861      1.1  mrg 	    }
   1862      1.1  mrg 
   1863      1.1  mrg 	  /* Warn about potential nested comments, but not if the '/'
   1864      1.1  mrg 	     comes immediately before the true comment delimiter.
   1865      1.1  mrg 	     Don't bother to get it right across escaped newlines.  */
   1866      1.1  mrg 	  if (CPP_OPTION (pfile, warn_comments)
   1867      1.1  mrg 	      && cur[0] == '*' && cur[1] != '/')
   1868      1.1  mrg 	    {
   1869      1.1  mrg 	      buffer->cur = cur;
   1870      1.1  mrg 	      cpp_warning_with_line (pfile, CPP_W_COMMENTS,
   1871      1.1  mrg 				     pfile->line_table->highest_line,
   1872      1.1  mrg 				     CPP_BUF_COL (buffer),
   1873      1.1  mrg 				     "\"/*\" within comment");
   1874      1.1  mrg 	    }
   1875      1.1  mrg 	}
   1876      1.1  mrg       else if (c == '\n')
   1877      1.1  mrg 	{
   1878      1.1  mrg 	  unsigned int cols;
   1879      1.1  mrg 	  buffer->cur = cur - 1;
   1880      1.1  mrg 	  if (warn_bidi_p)
   1881      1.1  mrg 	    maybe_warn_bidi_on_close (pfile, cur);
   1882      1.1  mrg 	  _cpp_process_line_notes (pfile, true);
   1883      1.1  mrg 	  if (buffer->next_line >= buffer->rlimit)
   1884      1.1  mrg 	    return true;
   1885      1.1  mrg 	  _cpp_clean_line (pfile);
   1886      1.1  mrg 
   1887      1.1  mrg 	  cols = buffer->next_line - buffer->line_base;
   1888      1.1  mrg 	  CPP_INCREMENT_LINE (pfile, cols);
   1889      1.1  mrg 
   1890      1.1  mrg 	  cur = buffer->cur;
   1891  1.1.1.3  mrg 	}
   1892  1.1.1.3  mrg       else if (__builtin_expect (c >= utf8_continuation, 0)
   1893  1.1.1.3  mrg 	       && warn_bidi_or_invalid_utf8_p)
   1894  1.1.1.3  mrg 	cur = _cpp_handle_multibyte_utf8 (pfile, c, cur, warn_bidi_p,
   1895      1.1  mrg 					  warn_invalid_utf8_p);
   1896      1.1  mrg     }
   1897      1.1  mrg 
   1898      1.1  mrg   buffer->cur = cur;
   1899      1.1  mrg   _cpp_process_line_notes (pfile, true);
   1900      1.1  mrg   return false;
   1901      1.1  mrg }
   1902      1.1  mrg 
   1903      1.1  mrg /* Skip a C++ line comment, leaving buffer->cur pointing to the
   1904      1.1  mrg    terminating newline.  Handles escaped newlines.  Returns nonzero
   1905      1.1  mrg    if a multiline comment.  */
   1906      1.1  mrg static int
   1907      1.1  mrg skip_line_comment (cpp_reader *pfile)
   1908      1.1  mrg {
   1909      1.1  mrg   cpp_buffer *buffer = pfile->buffer;
   1910      1.1  mrg   location_t orig_line = pfile->line_table->highest_line;
   1911  1.1.1.3  mrg   const bool warn_bidi_p = pfile->warn_bidi_p ();
   1912  1.1.1.3  mrg   const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
   1913      1.1  mrg   const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
   1914  1.1.1.3  mrg 
   1915      1.1  mrg   if (!warn_bidi_or_invalid_utf8_p)
   1916      1.1  mrg     while (*buffer->cur != '\n')
   1917  1.1.1.3  mrg       buffer->cur++;
   1918      1.1  mrg   else if (!warn_invalid_utf8_p)
   1919      1.1  mrg     {
   1920      1.1  mrg       while (*buffer->cur != '\n'
   1921      1.1  mrg 	     && *buffer->cur != bidi::utf8_start)
   1922      1.1  mrg 	buffer->cur++;
   1923      1.1  mrg       if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
   1924      1.1  mrg 	{
   1925      1.1  mrg 	  while (*buffer->cur != '\n')
   1926      1.1  mrg 	    {
   1927      1.1  mrg 	      if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
   1928      1.1  mrg 		{
   1929      1.1  mrg 		  location_t loc;
   1930      1.1  mrg 		  bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc);
   1931      1.1  mrg 		  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
   1932      1.1  mrg 		}
   1933      1.1  mrg 	      buffer->cur++;
   1934      1.1  mrg 	    }
   1935      1.1  mrg 	  maybe_warn_bidi_on_close (pfile, buffer->cur);
   1936      1.1  mrg 	}
   1937  1.1.1.3  mrg     }
   1938  1.1.1.3  mrg   else
   1939  1.1.1.3  mrg     {
   1940  1.1.1.3  mrg       while (*buffer->cur != '\n')
   1941  1.1.1.3  mrg 	{
   1942  1.1.1.3  mrg 	  if (*buffer->cur < utf8_continuation)
   1943  1.1.1.3  mrg 	    {
   1944  1.1.1.3  mrg 	      buffer->cur++;
   1945  1.1.1.3  mrg 	      continue;
   1946  1.1.1.3  mrg 	    }
   1947  1.1.1.3  mrg 	  buffer->cur
   1948  1.1.1.3  mrg 	    = _cpp_handle_multibyte_utf8 (pfile, *buffer->cur, buffer->cur + 1,
   1949  1.1.1.3  mrg 					  warn_bidi_p, warn_invalid_utf8_p);
   1950  1.1.1.3  mrg 	}
   1951  1.1.1.3  mrg       if (warn_bidi_p)
   1952  1.1.1.3  mrg 	maybe_warn_bidi_on_close (pfile, buffer->cur);
   1953      1.1  mrg     }
   1954      1.1  mrg 
   1955      1.1  mrg   _cpp_process_line_notes (pfile, true);
   1956      1.1  mrg   return orig_line != pfile->line_table->highest_line;
   1957      1.1  mrg }
   1958      1.1  mrg 
   1959      1.1  mrg /* Skips whitespace, saving the next non-whitespace character.  */
   1960      1.1  mrg static void
   1961      1.1  mrg skip_whitespace (cpp_reader *pfile, cppchar_t c)
   1962      1.1  mrg {
   1963      1.1  mrg   cpp_buffer *buffer = pfile->buffer;
   1964      1.1  mrg   bool saw_NUL = false;
   1965      1.1  mrg 
   1966      1.1  mrg   do
   1967      1.1  mrg     {
   1968      1.1  mrg       /* Horizontal space always OK.  */
   1969      1.1  mrg       if (c == ' ' || c == '\t')
   1970      1.1  mrg 	;
   1971      1.1  mrg       /* Just \f \v or \0 left.  */
   1972      1.1  mrg       else if (c == '\0')
   1973      1.1  mrg 	saw_NUL = true;
   1974      1.1  mrg       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
   1975      1.1  mrg 	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
   1976      1.1  mrg 			     CPP_BUF_COL (buffer),
   1977      1.1  mrg 			     "%s in preprocessing directive",
   1978      1.1  mrg 			     c == '\f' ? "form feed" : "vertical tab");
   1979      1.1  mrg 
   1980      1.1  mrg       c = *buffer->cur++;
   1981      1.1  mrg     }
   1982      1.1  mrg   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
   1983      1.1  mrg   while (is_nvspace (c));
   1984      1.1  mrg 
   1985      1.1  mrg   if (saw_NUL)
   1986      1.1  mrg     {
   1987      1.1  mrg       encoding_rich_location rich_loc (pfile);
   1988      1.1  mrg       cpp_error_at (pfile, CPP_DL_WARNING, &rich_loc,
   1989      1.1  mrg 		    "null character(s) ignored");
   1990      1.1  mrg     }
   1991      1.1  mrg 
   1992      1.1  mrg   buffer->cur--;
   1993      1.1  mrg }
   1994      1.1  mrg 
   1995      1.1  mrg /* See if the characters of a number token are valid in a name (no
   1996      1.1  mrg    '.', '+' or '-').  */
   1997      1.1  mrg static int
   1998      1.1  mrg name_p (cpp_reader *pfile, const cpp_string *string)
   1999      1.1  mrg {
   2000      1.1  mrg   unsigned int i;
   2001      1.1  mrg 
   2002      1.1  mrg   for (i = 0; i < string->len; i++)
   2003      1.1  mrg     if (!is_idchar (string->text[i]))
   2004      1.1  mrg       return 0;
   2005      1.1  mrg 
   2006      1.1  mrg   return 1;
   2007      1.1  mrg }
   2008      1.1  mrg 
   2009      1.1  mrg /* After parsing an identifier or other sequence, produce a warning about
   2010      1.1  mrg    sequences not in NFC/NFKC.  */
   2011      1.1  mrg static void
   2012      1.1  mrg warn_about_normalization (cpp_reader *pfile,
   2013  1.1.1.3  mrg 			  const cpp_token *token,
   2014  1.1.1.3  mrg 			  const struct normalize_state *s,
   2015      1.1  mrg 			  bool identifier)
   2016      1.1  mrg {
   2017      1.1  mrg   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
   2018      1.1  mrg       && !pfile->state.skipping)
   2019      1.1  mrg     {
   2020      1.1  mrg       location_t loc = token->src_loc;
   2021      1.1  mrg 
   2022      1.1  mrg       /* If possible, create a location range for the token.  */
   2023      1.1  mrg       if (loc >= RESERVED_LOCATION_COUNT
   2024      1.1  mrg 	  && token->type != CPP_EOF
   2025      1.1  mrg 	  /* There must be no line notes to process.  */
   2026      1.1  mrg 	  && (!(pfile->buffer->cur
   2027      1.1  mrg 		>= pfile->buffer->notes[pfile->buffer->cur_note].pos
   2028      1.1  mrg 		&& !pfile->overlaid_buffer)))
   2029      1.1  mrg 	{
   2030      1.1  mrg 	  source_range tok_range;
   2031      1.1  mrg 	  tok_range.m_start = loc;
   2032      1.1  mrg 	  tok_range.m_finish
   2033      1.1  mrg 	    = linemap_position_for_column (pfile->line_table,
   2034      1.1  mrg 					   CPP_BUF_COLUMN (pfile->buffer,
   2035  1.1.1.3  mrg 							   pfile->buffer->cur));
   2036  1.1.1.3  mrg 	  loc = pfile->line_table->get_or_create_combined_loc (loc, tok_range,
   2037      1.1  mrg 							       nullptr, 0);
   2038      1.1  mrg 	}
   2039      1.1  mrg 
   2040      1.1  mrg       encoding_rich_location rich_loc (pfile, loc);
   2041      1.1  mrg 
   2042      1.1  mrg       /* Make sure that the token is printed using UCNs, even
   2043      1.1  mrg 	 if we'd otherwise happily print UTF-8.  */
   2044      1.1  mrg       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
   2045      1.1  mrg       size_t sz;
   2046      1.1  mrg 
   2047      1.1  mrg       sz = cpp_spell_token (pfile, token, buf, false) - buf;
   2048      1.1  mrg       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
   2049      1.1  mrg 	cpp_warning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
   2050  1.1.1.3  mrg 			"`%.*s' is not in NFKC", (int) sz, buf);
   2051      1.1  mrg       else if (identifier && CPP_OPTION (pfile, xid_identifiers))
   2052      1.1  mrg 	cpp_pedwarning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
   2053      1.1  mrg 				  "`%.*s' is not in NFC", (int) sz, buf);
   2054      1.1  mrg       else
   2055      1.1  mrg 	cpp_warning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
   2056      1.1  mrg 			"`%.*s' is not in NFC", (int) sz, buf);
   2057      1.1  mrg       free (buf);
   2058      1.1  mrg     }
   2059      1.1  mrg }
   2060  1.1.1.3  mrg 
   2061  1.1.1.3  mrg /* Returns TRUE if the byte sequence starting at buffer->cur is a valid
   2062  1.1.1.3  mrg    extended character in an identifier.  If FIRST is TRUE, then the character
   2063  1.1.1.3  mrg    must be valid at the beginning of an identifier as well.  If the return
   2064  1.1.1.3  mrg    value is TRUE, then pfile->buffer->cur has been moved to point to the next
   2065      1.1  mrg    byte after the extended character.  */
   2066      1.1  mrg 
   2067      1.1  mrg static bool
   2068      1.1  mrg forms_identifier_p (cpp_reader *pfile, int first,
   2069      1.1  mrg 		    struct normalize_state *state)
   2070      1.1  mrg {
   2071      1.1  mrg   cpp_buffer *buffer = pfile->buffer;
   2072      1.1  mrg   const bool warn_bidi_p = pfile->warn_bidi_p ();
   2073      1.1  mrg 
   2074      1.1  mrg   if (*buffer->cur == '$')
   2075      1.1  mrg     {
   2076      1.1  mrg       if (!CPP_OPTION (pfile, dollars_in_ident))
   2077      1.1  mrg 	return false;
   2078      1.1  mrg 
   2079      1.1  mrg       buffer->cur++;
   2080      1.1  mrg       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
   2081      1.1  mrg 	{
   2082      1.1  mrg 	  CPP_OPTION (pfile, warn_dollars) = 0;
   2083      1.1  mrg 	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
   2084      1.1  mrg 	}
   2085      1.1  mrg 
   2086      1.1  mrg       return true;
   2087      1.1  mrg     }
   2088      1.1  mrg 
   2089      1.1  mrg   /* Is this a syntactically valid UCN or a valid UTF-8 char?  */
   2090      1.1  mrg   if (CPP_OPTION (pfile, extended_identifiers))
   2091      1.1  mrg     {
   2092      1.1  mrg       cppchar_t s;
   2093      1.1  mrg       if (*buffer->cur >= utf8_signifier)
   2094      1.1  mrg 	{
   2095      1.1  mrg 	  if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)
   2096      1.1  mrg 	      && warn_bidi_p)
   2097      1.1  mrg 	    {
   2098      1.1  mrg 	      location_t loc;
   2099      1.1  mrg 	      bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc);
   2100      1.1  mrg 	      maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
   2101      1.1  mrg 	    }
   2102      1.1  mrg 	  if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
   2103      1.1  mrg 			       state, &s))
   2104      1.1  mrg 	    return true;
   2105      1.1  mrg 	}
   2106  1.1.1.3  mrg       else if (*buffer->cur == '\\'
   2107  1.1.1.3  mrg 	       && (buffer->cur[1] == 'u'
   2108  1.1.1.3  mrg 		   || buffer->cur[1] == 'U'
   2109      1.1  mrg 		   || buffer->cur[1] == 'N'))
   2110      1.1  mrg 	{
   2111      1.1  mrg 	  buffer->cur += 2;
   2112      1.1  mrg 	  if (warn_bidi_p)
   2113      1.1  mrg 	    {
   2114  1.1.1.3  mrg 	      location_t loc;
   2115  1.1.1.3  mrg 	      bidi::kind kind;
   2116  1.1.1.3  mrg 	      if (buffer->cur[-1] == 'N')
   2117  1.1.1.3  mrg 		kind = get_bidi_named (pfile, buffer->cur, &loc);
   2118  1.1.1.3  mrg 	      else
   2119  1.1.1.3  mrg 		kind = get_bidi_ucn (pfile, buffer->cur,
   2120      1.1  mrg 				     buffer->cur[-1] == 'U', &loc);
   2121      1.1  mrg 	      maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc);
   2122      1.1  mrg 	    }
   2123      1.1  mrg 	  if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
   2124      1.1  mrg 			      state, &s, NULL, NULL))
   2125      1.1  mrg 	    return true;
   2126      1.1  mrg 	  buffer->cur -= 2;
   2127      1.1  mrg 	}
   2128      1.1  mrg     }
   2129      1.1  mrg 
   2130      1.1  mrg   return false;
   2131      1.1  mrg }
   2132      1.1  mrg 
   2133      1.1  mrg /* Helper function to issue error about improper __VA_OPT__ use.  */
   2134      1.1  mrg static void
   2135      1.1  mrg maybe_va_opt_error (cpp_reader *pfile)
   2136      1.1  mrg {
   2137      1.1  mrg   if (CPP_PEDANTIC (pfile) && !CPP_OPTION (pfile, va_opt))
   2138      1.1  mrg     {
   2139      1.1  mrg       /* __VA_OPT__ should not be accepted at all, but allow it in
   2140      1.1  mrg 	 system headers.  */
   2141  1.1.1.3  mrg       if (!_cpp_in_system_header (pfile))
   2142  1.1.1.3  mrg 	{
   2143  1.1.1.3  mrg 	  if (CPP_OPTION (pfile, cplusplus))
   2144  1.1.1.3  mrg 	    cpp_error (pfile, CPP_DL_PEDWARN,
   2145  1.1.1.3  mrg 		       "__VA_OPT__ is not available until C++20");
   2146  1.1.1.3  mrg 	  else
   2147  1.1.1.3  mrg 	    cpp_error (pfile, CPP_DL_PEDWARN,
   2148  1.1.1.3  mrg 		       "__VA_OPT__ is not available until C23");
   2149      1.1  mrg 	}
   2150      1.1  mrg     }
   2151      1.1  mrg   else if (!pfile->state.va_args_ok)
   2152      1.1  mrg     {
   2153      1.1  mrg       /* __VA_OPT__ should only appear in the replacement list of a
   2154      1.1  mrg 	 variadic macro.  */
   2155      1.1  mrg       cpp_error (pfile, CPP_DL_PEDWARN,
   2156      1.1  mrg 		 "__VA_OPT__ can only appear in the expansion"
   2157      1.1  mrg 		 " of a C++20 variadic macro");
   2158      1.1  mrg     }
   2159      1.1  mrg }
   2160  1.1.1.3  mrg 
   2161  1.1.1.3  mrg /* Helper function to perform diagnostics that are needed (rarely)
   2162  1.1.1.3  mrg    when an identifier is lexed.  */
   2163  1.1.1.3  mrg static void
   2164  1.1.1.3  mrg identifier_diagnostics_on_lex (cpp_reader *pfile, cpp_hashnode *node)
   2165  1.1.1.3  mrg {
   2166  1.1.1.3  mrg   if (__builtin_expect (!(node->flags & NODE_DIAGNOSTIC)
   2167  1.1.1.3  mrg 			|| pfile->state.skipping, 1))
   2168  1.1.1.3  mrg     return;
   2169  1.1.1.3  mrg 
   2170  1.1.1.3  mrg   /* It is allowed to poison the same identifier twice.  */
   2171  1.1.1.3  mrg   if ((node->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
   2172  1.1.1.3  mrg     {
   2173  1.1.1.3  mrg       cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
   2174  1.1.1.3  mrg 		 NODE_NAME (node));
   2175  1.1.1.3  mrg       const auto data = (cpp_hashnode_extra *)
   2176  1.1.1.3  mrg 	ht_lookup (pfile->extra_hash_table, node->ident, HT_NO_INSERT);
   2177  1.1.1.3  mrg       if (data && data->poisoned_loc)
   2178  1.1.1.3  mrg 	cpp_error_at (pfile, CPP_DL_NOTE, data->poisoned_loc, "poisoned here");
   2179  1.1.1.3  mrg     }
   2180  1.1.1.3  mrg 
   2181  1.1.1.3  mrg   /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
   2182  1.1.1.3  mrg      replacement list of a variadic macro.  */
   2183  1.1.1.3  mrg   if (node == pfile->spec_nodes.n__VA_ARGS__
   2184  1.1.1.3  mrg       && !pfile->state.va_args_ok)
   2185  1.1.1.3  mrg     {
   2186  1.1.1.3  mrg       if (CPP_OPTION (pfile, cplusplus))
   2187  1.1.1.3  mrg 	cpp_error (pfile, CPP_DL_PEDWARN,
   2188  1.1.1.3  mrg 		   "__VA_ARGS__ can only appear in the expansion"
   2189  1.1.1.3  mrg 		   " of a C++11 variadic macro");
   2190  1.1.1.3  mrg       else
   2191  1.1.1.3  mrg 	cpp_error (pfile, CPP_DL_PEDWARN,
   2192  1.1.1.3  mrg 		   "__VA_ARGS__ can only appear in the expansion"
   2193  1.1.1.3  mrg 		   " of a C99 variadic macro");
   2194  1.1.1.3  mrg     }
   2195  1.1.1.3  mrg 
   2196  1.1.1.3  mrg   /* __VA_OPT__ should only appear in the replacement list of a
   2197  1.1.1.3  mrg      variadic macro.  */
   2198  1.1.1.3  mrg   if (node == pfile->spec_nodes.n__VA_OPT__)
   2199  1.1.1.3  mrg     maybe_va_opt_error (pfile);
   2200  1.1.1.3  mrg 
   2201  1.1.1.3  mrg   /* For -Wc++-compat, warn about use of C++ named operators.  */
   2202  1.1.1.3  mrg   if (node->flags & NODE_WARN_OPERATOR)
   2203  1.1.1.3  mrg     cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
   2204  1.1.1.3  mrg 		 "identifier \"%s\" is a special operator name in C++",
   2205  1.1.1.3  mrg 		 NODE_NAME (node));
   2206  1.1.1.3  mrg }
   2207      1.1  mrg 
   2208      1.1  mrg /* Helper function to get the cpp_hashnode of the identifier BASE.  */
   2209      1.1  mrg static cpp_hashnode *
   2210      1.1  mrg lex_identifier_intern (cpp_reader *pfile, const uchar *base)
   2211      1.1  mrg {
   2212      1.1  mrg   cpp_hashnode *result;
   2213      1.1  mrg   const uchar *cur;
   2214      1.1  mrg   unsigned int len;
   2215      1.1  mrg   unsigned int hash = HT_HASHSTEP (0, *base);
   2216      1.1  mrg 
   2217      1.1  mrg   cur = base + 1;
   2218      1.1  mrg   while (ISIDNUM (*cur))
   2219      1.1  mrg     {
   2220      1.1  mrg       hash = HT_HASHSTEP (hash, *cur);
   2221      1.1  mrg       cur++;
   2222      1.1  mrg     }
   2223      1.1  mrg   len = cur - base;
   2224      1.1  mrg   hash = HT_HASHFINISH (hash, len);
   2225      1.1  mrg   result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
   2226  1.1.1.3  mrg 					      base, len, hash, HT_ALLOC));
   2227      1.1  mrg   identifier_diagnostics_on_lex (pfile, result);
   2228      1.1  mrg   return result;
   2229      1.1  mrg }
   2230      1.1  mrg 
   2231      1.1  mrg /* Get the cpp_hashnode of an identifier specified by NAME in
   2232      1.1  mrg    the current cpp_reader object.  If none is found, NULL is returned.  */
   2233      1.1  mrg cpp_hashnode *
   2234      1.1  mrg _cpp_lex_identifier (cpp_reader *pfile, const char *name)
   2235      1.1  mrg {
   2236      1.1  mrg   cpp_hashnode *result;
   2237      1.1  mrg   result = lex_identifier_intern (pfile, (uchar *) name);
   2238      1.1  mrg   return result;
   2239      1.1  mrg }
   2240  1.1.1.3  mrg 
   2241  1.1.1.3  mrg /* Lex an identifier starting at BASE.  BUFFER->CUR is expected to point
   2242  1.1.1.3  mrg    one past the first character at BASE, which may be a (possibly multi-byte)
   2243      1.1  mrg    character if STARTS_UCN is true.  */
   2244      1.1  mrg static cpp_hashnode *
   2245      1.1  mrg lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
   2246      1.1  mrg 		struct normalize_state *nst, cpp_hashnode **spelling)
   2247      1.1  mrg {
   2248      1.1  mrg   cpp_hashnode *result;
   2249      1.1  mrg   const uchar *cur;
   2250      1.1  mrg   unsigned int len;
   2251      1.1  mrg   unsigned int hash = HT_HASHSTEP (0, *base);
   2252      1.1  mrg   const bool warn_bidi_p = pfile->warn_bidi_p ();
   2253      1.1  mrg 
   2254      1.1  mrg   cur = pfile->buffer->cur;
   2255      1.1  mrg   if (! starts_ucn)
   2256      1.1  mrg     {
   2257      1.1  mrg       while (ISIDNUM (*cur))
   2258      1.1  mrg 	{
   2259      1.1  mrg 	  hash = HT_HASHSTEP (hash, *cur);
   2260      1.1  mrg 	  cur++;
   2261      1.1  mrg 	}
   2262      1.1  mrg       NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
   2263      1.1  mrg     }
   2264      1.1  mrg   pfile->buffer->cur = cur;
   2265      1.1  mrg   if (starts_ucn || forms_identifier_p (pfile, false, nst))
   2266      1.1  mrg     {
   2267      1.1  mrg       /* Slower version for identifiers containing UCNs
   2268      1.1  mrg 	 or extended chars (including $).  */
   2269      1.1  mrg       do {
   2270      1.1  mrg 	while (ISIDNUM (*pfile->buffer->cur))
   2271      1.1  mrg 	  {
   2272      1.1  mrg 	    NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
   2273      1.1  mrg 	    pfile->buffer->cur++;
   2274      1.1  mrg 	  }
   2275      1.1  mrg       } while (forms_identifier_p (pfile, false, nst));
   2276      1.1  mrg       if (warn_bidi_p)
   2277      1.1  mrg 	maybe_warn_bidi_on_close (pfile, pfile->buffer->cur);
   2278      1.1  mrg       result = _cpp_interpret_identifier (pfile, base,
   2279      1.1  mrg 					  pfile->buffer->cur - base);
   2280      1.1  mrg       *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
   2281      1.1  mrg     }
   2282      1.1  mrg   else
   2283      1.1  mrg     {
   2284      1.1  mrg       len = cur - base;
   2285      1.1  mrg       hash = HT_HASHFINISH (hash, len);
   2286      1.1  mrg 
   2287      1.1  mrg       result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
   2288      1.1  mrg 						  base, len, hash, HT_ALLOC));
   2289      1.1  mrg       *spelling = result;
   2290      1.1  mrg     }
   2291  1.1.1.3  mrg 
   2292  1.1.1.3  mrg   return result;
   2293      1.1  mrg }
   2294  1.1.1.3  mrg 
   2295  1.1.1.3  mrg /* Struct to hold the return value of the scan_cur_identifier () helper
   2296      1.1  mrg    function below.  */
   2297  1.1.1.3  mrg 
   2298  1.1.1.3  mrg struct scan_id_result
   2299  1.1.1.3  mrg {
   2300  1.1.1.3  mrg   cpp_hashnode *node;
   2301  1.1.1.3  mrg   normalize_state nst;
   2302  1.1.1.3  mrg 
   2303  1.1.1.3  mrg   scan_id_result ()
   2304  1.1.1.3  mrg     : node (nullptr)
   2305  1.1.1.3  mrg   {
   2306  1.1.1.3  mrg     nst = INITIAL_NORMALIZE_STATE;
   2307      1.1  mrg   }
   2308  1.1.1.3  mrg 
   2309  1.1.1.3  mrg   explicit operator bool () const { return node; }
   2310  1.1.1.3  mrg };
   2311  1.1.1.3  mrg 
   2312  1.1.1.3  mrg /* Helper function to scan an entire identifier beginning at
   2313  1.1.1.3  mrg    pfile->buffer->cur, and possibly containing extended characters (UCNs
   2314  1.1.1.3  mrg    and/or UTF-8).  Returns the cpp_hashnode for the identifier on success, or
   2315  1.1.1.3  mrg    else nullptr, as well as a normalize_state so that normalization warnings
   2316  1.1.1.3  mrg    may be issued once the token lexing is complete.  */
   2317  1.1.1.3  mrg 
   2318  1.1.1.3  mrg static scan_id_result
   2319  1.1.1.3  mrg scan_cur_identifier (cpp_reader *pfile)
   2320  1.1.1.3  mrg {
   2321  1.1.1.3  mrg   const auto buffer = pfile->buffer;
   2322  1.1.1.3  mrg   const auto begin = buffer->cur;
   2323  1.1.1.3  mrg   scan_id_result result;
   2324  1.1.1.3  mrg   if (ISIDST (*buffer->cur))
   2325  1.1.1.3  mrg     {
   2326  1.1.1.3  mrg       ++buffer->cur;
   2327  1.1.1.3  mrg       cpp_hashnode *ignore;
   2328  1.1.1.3  mrg       result.node = lex_identifier (pfile, begin, false, &result.nst, &ignore);
   2329  1.1.1.3  mrg     }
   2330  1.1.1.3  mrg   else if (forms_identifier_p (pfile, true, &result.nst))
   2331  1.1.1.3  mrg     {
   2332  1.1.1.3  mrg       /* buffer->cur has been moved already by the call
   2333  1.1.1.3  mrg 	 to forms_identifier_p.  */
   2334  1.1.1.3  mrg       cpp_hashnode *ignore;
   2335  1.1.1.3  mrg       result.node = lex_identifier (pfile, begin, true, &result.nst, &ignore);
   2336      1.1  mrg     }
   2337      1.1  mrg   return result;
   2338      1.1  mrg }
   2339      1.1  mrg 
   2340      1.1  mrg /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
   2341      1.1  mrg static void
   2342      1.1  mrg lex_number (cpp_reader *pfile, cpp_string *number,
   2343      1.1  mrg 	    struct normalize_state *nst)
   2344      1.1  mrg {
   2345      1.1  mrg   const uchar *cur;
   2346      1.1  mrg   const uchar *base;
   2347      1.1  mrg   uchar *dest;
   2348      1.1  mrg 
   2349      1.1  mrg   base = pfile->buffer->cur - 1;
   2350      1.1  mrg   do
   2351      1.1  mrg     {
   2352      1.1  mrg       const uchar *adj_digit_sep = NULL;
   2353      1.1  mrg       cur = pfile->buffer->cur;
   2354      1.1  mrg 
   2355      1.1  mrg       /* N.B. ISIDNUM does not include $.  */
   2356      1.1  mrg       while (ISIDNUM (*cur)
   2357      1.1  mrg 	     || (*cur == '.' && !DIGIT_SEP (cur[-1]))
   2358      1.1  mrg 	     || DIGIT_SEP (*cur)
   2359      1.1  mrg 	     || (VALID_SIGN (*cur, cur[-1]) && !DIGIT_SEP (cur[-2])))
   2360      1.1  mrg 	{
   2361      1.1  mrg 	  NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
   2362      1.1  mrg 	  /* Adjacent digit separators do not form part of the pp-number syntax.
   2363      1.1  mrg 	     However, they can safely be diagnosed here as an error, since '' is
   2364      1.1  mrg 	     not a valid preprocessing token.  */
   2365      1.1  mrg 	  if (DIGIT_SEP (*cur) && DIGIT_SEP (cur[-1]) && !adj_digit_sep)
   2366      1.1  mrg 	    adj_digit_sep = cur;
   2367      1.1  mrg 	  cur++;
   2368      1.1  mrg 	}
   2369      1.1  mrg       /* A number can't end with a digit separator.  */
   2370      1.1  mrg       while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1]))
   2371      1.1  mrg 	--cur;
   2372      1.1  mrg       if (adj_digit_sep && adj_digit_sep < cur)
   2373      1.1  mrg 	cpp_error (pfile, CPP_DL_ERROR, "adjacent digit separators");
   2374      1.1  mrg 
   2375      1.1  mrg       pfile->buffer->cur = cur;
   2376      1.1  mrg     }
   2377      1.1  mrg   while (forms_identifier_p (pfile, false, nst));
   2378      1.1  mrg 
   2379      1.1  mrg   number->len = cur - base;
   2380      1.1  mrg   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
   2381      1.1  mrg   memcpy (dest, base, number->len);
   2382      1.1  mrg   dest[number->len] = '\0';
   2383      1.1  mrg   number->text = dest;
   2384      1.1  mrg }
   2385      1.1  mrg 
   2386      1.1  mrg /* Create a token of type TYPE with a literal spelling.  */
   2387      1.1  mrg static void
   2388      1.1  mrg create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
   2389      1.1  mrg 		unsigned int len, enum cpp_ttype type)
   2390      1.1  mrg {
   2391      1.1  mrg   token->type = type;
   2392      1.1  mrg   token->val.str.len = len;
   2393      1.1  mrg   token->val.str.text = cpp_alloc_token_string (pfile, base, len);
   2394      1.1  mrg }
   2395  1.1.1.3  mrg 
   2396  1.1.1.3  mrg /* Like create_literal(), but construct it from two separate strings
   2397  1.1.1.3  mrg    which are concatenated.  LEN2 may be 0 if no second string is
   2398  1.1.1.3  mrg    required.  */
   2399  1.1.1.3  mrg static void
   2400  1.1.1.3  mrg create_literal2 (cpp_reader *pfile, cpp_token *token, const uchar *base1,
   2401  1.1.1.3  mrg 		 unsigned int len1, const uchar *base2, unsigned int len2,
   2402  1.1.1.3  mrg 		 enum cpp_ttype type)
   2403  1.1.1.3  mrg {
   2404  1.1.1.3  mrg   token->type = type;
   2405  1.1.1.3  mrg   token->val.str.len = len1 + len2;
   2406  1.1.1.3  mrg   uchar *const dest = _cpp_unaligned_alloc (pfile, len1 + len2 + 1);
   2407  1.1.1.3  mrg   memcpy (dest, base1, len1);
   2408  1.1.1.3  mrg   if (len2)
   2409  1.1.1.3  mrg     memcpy (dest+len1, base2, len2);
   2410  1.1.1.3  mrg   dest[len1 + len2] = 0;
   2411  1.1.1.3  mrg   token->val.str.text = dest;
   2412  1.1.1.3  mrg }
   2413      1.1  mrg 
   2414      1.1  mrg const uchar *
   2415      1.1  mrg cpp_alloc_token_string (cpp_reader *pfile,
   2416      1.1  mrg 			const unsigned char *ptr, unsigned len)
   2417      1.1  mrg {
   2418      1.1  mrg   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
   2419      1.1  mrg 
   2420      1.1  mrg   dest[len] = 0;
   2421      1.1  mrg   memcpy (dest, ptr, len);
   2422      1.1  mrg   return dest;
   2423      1.1  mrg }
   2424      1.1  mrg 
   2425      1.1  mrg /* A pair of raw buffer pointers.  The currently open one is [1], the
   2426      1.1  mrg    first one is [0].  Used for string literal lexing.  */
   2427      1.1  mrg struct lit_accum {
   2428      1.1  mrg   _cpp_buff *first;
   2429      1.1  mrg   _cpp_buff *last;
   2430      1.1  mrg   const uchar *rpos;
   2431      1.1  mrg   size_t accum;
   2432      1.1  mrg 
   2433      1.1  mrg   lit_accum ()
   2434      1.1  mrg     : first (NULL), last (NULL), rpos (0), accum (0)
   2435      1.1  mrg   {
   2436      1.1  mrg   }
   2437      1.1  mrg 
   2438      1.1  mrg   void append (cpp_reader *, const uchar *, size_t);
   2439      1.1  mrg 
   2440      1.1  mrg   void read_begin (cpp_reader *);
   2441      1.1  mrg   bool reading_p () const
   2442      1.1  mrg   {
   2443      1.1  mrg     return rpos != NULL;
   2444      1.1  mrg   }
   2445      1.1  mrg   char read_char ()
   2446      1.1  mrg   {
   2447      1.1  mrg     char c = *rpos++;
   2448      1.1  mrg     if (rpos == BUFF_FRONT (last))
   2449      1.1  mrg       rpos = NULL;
   2450      1.1  mrg     return c;
   2451  1.1.1.3  mrg   }
   2452  1.1.1.3  mrg 
   2453  1.1.1.3  mrg   void create_literal2 (cpp_reader *pfile, cpp_token *token,
   2454  1.1.1.3  mrg 			const uchar *base1, unsigned int len1,
   2455  1.1.1.3  mrg 			const uchar *base2, unsigned int len2,
   2456      1.1  mrg 			enum cpp_ttype type);
   2457      1.1  mrg };
   2458      1.1  mrg 
   2459      1.1  mrg /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
   2460      1.1  mrg    sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
   2461      1.1  mrg 
   2462      1.1  mrg void
   2463      1.1  mrg lit_accum::append (cpp_reader *pfile, const uchar *base, size_t len)
   2464      1.1  mrg {
   2465      1.1  mrg   if (!last)
   2466      1.1  mrg     /* Starting.  */
   2467      1.1  mrg     first = last = _cpp_get_buff (pfile, len);
   2468      1.1  mrg   else if (len > BUFF_ROOM (last))
   2469      1.1  mrg     {
   2470      1.1  mrg       /* There is insufficient room in the buffer.  Copy what we can,
   2471      1.1  mrg 	 and then either extend or create a new one.  */
   2472      1.1  mrg       size_t room = BUFF_ROOM (last);
   2473      1.1  mrg       memcpy (BUFF_FRONT (last), base, room);
   2474      1.1  mrg       BUFF_FRONT (last) += room;
   2475      1.1  mrg       base += room;
   2476      1.1  mrg       len -= room;
   2477      1.1  mrg       accum += room;
   2478      1.1  mrg 
   2479      1.1  mrg       gcc_checking_assert (!rpos);
   2480      1.1  mrg 
   2481      1.1  mrg       last = _cpp_append_extend_buff (pfile, last, len);
   2482      1.1  mrg     }
   2483      1.1  mrg 
   2484      1.1  mrg   memcpy (BUFF_FRONT (last), base, len);
   2485      1.1  mrg   BUFF_FRONT (last) += len;
   2486      1.1  mrg   accum += len;
   2487      1.1  mrg }
   2488      1.1  mrg 
   2489      1.1  mrg void
   2490      1.1  mrg lit_accum::read_begin (cpp_reader *pfile)
   2491      1.1  mrg {
   2492      1.1  mrg   /* We never accumulate more than 4 chars to read.  */
   2493      1.1  mrg   if (BUFF_ROOM (last) < 4)
   2494      1.1  mrg 
   2495      1.1  mrg     last = _cpp_append_extend_buff (pfile, last, 4);
   2496      1.1  mrg   rpos = BUFF_FRONT (last);
   2497      1.1  mrg }
   2498  1.1.1.3  mrg 
   2499  1.1.1.3  mrg /* Helper function to check if a string format macro, say from inttypes.h, is
   2500  1.1.1.3  mrg    placed touching a string literal, in which case it could be parsed as a C++11
   2501  1.1.1.3  mrg    user-defined string literal thus breaking the program.  Return TRUE if the
   2502  1.1.1.3  mrg    UDL should be ignored for now and preserved for potential macro
   2503      1.1  mrg    expansion.  */
   2504      1.1  mrg 
   2505  1.1.1.3  mrg static bool
   2506  1.1.1.3  mrg maybe_ignore_udl_macro_suffix (cpp_reader *pfile, location_t src_loc,
   2507      1.1  mrg 			       const uchar *suffix_begin, cpp_hashnode *node)
   2508      1.1  mrg {
   2509      1.1  mrg   /* User-defined literals outside of namespace std must start with a single
   2510      1.1  mrg      underscore, so assume anything of that form really is a UDL suffix.
   2511      1.1  mrg      We don't need to worry about UDLs defined inside namespace std because
   2512      1.1  mrg      their names are reserved, so cannot be used as macro names in valid
   2513  1.1.1.3  mrg      programs.  */
   2514  1.1.1.3  mrg   if ((suffix_begin[0] == '_' && suffix_begin[1] != '_')
   2515      1.1  mrg       || !cpp_macro_p (node))
   2516  1.1.1.3  mrg     return false;
   2517  1.1.1.3  mrg 
   2518  1.1.1.3  mrg   /* Maybe raise a warning here; caller should arrange not to consume
   2519  1.1.1.3  mrg      the tokens.  */
   2520  1.1.1.3  mrg   if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
   2521  1.1.1.3  mrg     cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX, src_loc, 0,
   2522  1.1.1.3  mrg 			   "invalid suffix on literal; C++11 requires a space "
   2523  1.1.1.3  mrg 			   "between literal and string macro");
   2524  1.1.1.3  mrg   return true;
   2525  1.1.1.3  mrg }
   2526  1.1.1.3  mrg 
   2527  1.1.1.3  mrg /* Like create_literal2(), but also prepend all the accumulated data from
   2528  1.1.1.3  mrg    the lit_accum struct.  */
   2529  1.1.1.3  mrg void
   2530  1.1.1.3  mrg lit_accum::create_literal2 (cpp_reader *pfile, cpp_token *token,
   2531  1.1.1.3  mrg 			    const uchar *base1, unsigned int len1,
   2532  1.1.1.3  mrg 			    const uchar *base2, unsigned int len2,
   2533  1.1.1.3  mrg 			    enum cpp_ttype type)
   2534  1.1.1.3  mrg {
   2535  1.1.1.3  mrg   const unsigned int tot_len = accum + len1 + len2;
   2536  1.1.1.3  mrg   uchar *dest = _cpp_unaligned_alloc (pfile, tot_len + 1);
   2537  1.1.1.3  mrg   token->type = type;
   2538  1.1.1.3  mrg   token->val.str.len = tot_len;
   2539  1.1.1.3  mrg   token->val.str.text = dest;
   2540  1.1.1.3  mrg   for (_cpp_buff *buf = first; buf; buf = buf->next)
   2541  1.1.1.3  mrg     {
   2542  1.1.1.3  mrg       size_t len = BUFF_FRONT (buf) - buf->base;
   2543  1.1.1.3  mrg       memcpy (dest, buf->base, len);
   2544  1.1.1.3  mrg       dest += len;
   2545  1.1.1.3  mrg     }
   2546  1.1.1.3  mrg   memcpy (dest, base1, len1);
   2547  1.1.1.3  mrg   dest += len1;
   2548  1.1.1.3  mrg   if (len2)
   2549  1.1.1.3  mrg     memcpy (dest, base2, len2);
   2550  1.1.1.3  mrg   dest += len2;
   2551      1.1  mrg   *dest = '\0';
   2552      1.1  mrg }
   2553      1.1  mrg 
   2554      1.1  mrg /* Lexes a raw string.  The stored string contains the spelling,
   2555      1.1  mrg    including double quotes, delimiter string, '(' and ')', any leading
   2556      1.1  mrg    'L', 'u', 'U' or 'u8' and 'R' modifier.  The created token contains
   2557      1.1  mrg    the type of the literal, or CPP_OTHER if it was not properly
   2558      1.1  mrg    terminated.
   2559      1.1  mrg 
   2560      1.1  mrg    BASE is the start of the token.  Updates pfile->buffer->cur to just
   2561      1.1  mrg    after the lexed string.
   2562      1.1  mrg 
   2563      1.1  mrg    The spelling is NUL-terminated, but it is not guaranteed that this
   2564      1.1  mrg    is the first NUL since embedded NULs are preserved.  */
   2565      1.1  mrg 
   2566      1.1  mrg static void
   2567      1.1  mrg lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
   2568      1.1  mrg {
   2569      1.1  mrg   const uchar *pos = base;
   2570  1.1.1.3  mrg   const bool warn_bidi_p = pfile->warn_bidi_p ();
   2571  1.1.1.3  mrg   const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
   2572      1.1  mrg   const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
   2573      1.1  mrg 
   2574      1.1  mrg   /* 'tis a pity this information isn't passed down from the lexer's
   2575      1.1  mrg      initial categorization of the token.  */
   2576      1.1  mrg   enum cpp_ttype type = CPP_STRING;
   2577      1.1  mrg 
   2578      1.1  mrg   if (*pos == 'L')
   2579      1.1  mrg     {
   2580      1.1  mrg       type = CPP_WSTRING;
   2581      1.1  mrg       pos++;
   2582      1.1  mrg     }
   2583      1.1  mrg   else if (*pos == 'U')
   2584      1.1  mrg     {
   2585      1.1  mrg       type = CPP_STRING32;
   2586      1.1  mrg       pos++;
   2587      1.1  mrg     }
   2588      1.1  mrg   else if (*pos == 'u')
   2589      1.1  mrg     {
   2590      1.1  mrg       if (pos[1] == '8')
   2591      1.1  mrg 	{
   2592      1.1  mrg 	  type = CPP_UTF8STRING;
   2593      1.1  mrg 	  pos++;
   2594      1.1  mrg 	}
   2595      1.1  mrg       else
   2596      1.1  mrg 	type = CPP_STRING16;
   2597      1.1  mrg       pos++;
   2598      1.1  mrg     }
   2599      1.1  mrg 
   2600      1.1  mrg   gcc_checking_assert (pos[0] == 'R' && pos[1] == '"');
   2601      1.1  mrg   pos += 2;
   2602      1.1  mrg 
   2603      1.1  mrg   _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
   2604      1.1  mrg 
   2605      1.1  mrg   /* Skip notes before the ".  */
   2606      1.1  mrg   while (note->pos < pos)
   2607      1.1  mrg     ++note;
   2608      1.1  mrg 
   2609      1.1  mrg   lit_accum accum;
   2610      1.1  mrg 
   2611      1.1  mrg   uchar prefix[17];
   2612      1.1  mrg   unsigned prefix_len = 0;
   2613      1.1  mrg   enum Phase
   2614      1.1  mrg   {
   2615      1.1  mrg    PHASE_PREFIX = -2,
   2616      1.1  mrg    PHASE_NONE = -1,
   2617      1.1  mrg    PHASE_SUFFIX = 0
   2618      1.1  mrg   } phase = PHASE_PREFIX;
   2619      1.1  mrg 
   2620      1.1  mrg   for (;;)
   2621      1.1  mrg     {
   2622      1.1  mrg       gcc_checking_assert (note->pos >= pos);
   2623      1.1  mrg 
   2624      1.1  mrg       /* Undo any escaped newlines and trigraphs.  */
   2625      1.1  mrg       if (!accum.reading_p () && note->pos == pos)
   2626      1.1  mrg 	switch (note->type)
   2627      1.1  mrg 	  {
   2628      1.1  mrg 	  case '\\':
   2629      1.1  mrg 	  case ' ':
   2630      1.1  mrg 	    /* Restore backslash followed by newline.  */
   2631      1.1  mrg 	    accum.append (pfile, base, pos - base);
   2632      1.1  mrg 	    base = pos;
   2633      1.1  mrg 	    accum.read_begin (pfile);
   2634      1.1  mrg 	    accum.append (pfile, UC"\\", 1);
   2635      1.1  mrg 
   2636      1.1  mrg 	  after_backslash:
   2637      1.1  mrg 	    if (note->type == ' ')
   2638      1.1  mrg 	      /* GNU backslash whitespace newline extension.  FIXME
   2639      1.1  mrg 		 could be any sequence of non-vertical space.  When we
   2640      1.1  mrg 		 can properly restore any such sequence, we should
   2641      1.1  mrg 		 mark this note as handled so _cpp_process_line_notes
   2642      1.1  mrg 		 doesn't warn.  */
   2643      1.1  mrg 	      accum.append (pfile, UC" ", 1);
   2644      1.1  mrg 
   2645      1.1  mrg 	    accum.append (pfile, UC"\n", 1);
   2646      1.1  mrg 	    note++;
   2647      1.1  mrg 	    break;
   2648      1.1  mrg 
   2649      1.1  mrg 	  case '\n':
   2650      1.1  mrg 	    /* This can happen for ??/<NEWLINE> when trigraphs are not
   2651      1.1  mrg 	       being interpretted.  */
   2652      1.1  mrg 	    gcc_checking_assert (!CPP_OPTION (pfile, trigraphs));
   2653      1.1  mrg 	    note->type = 0;
   2654      1.1  mrg 	    note++;
   2655      1.1  mrg 	    break;
   2656      1.1  mrg 
   2657      1.1  mrg 	  default:
   2658      1.1  mrg 	    gcc_checking_assert (_cpp_trigraph_map[note->type]);
   2659      1.1  mrg 
   2660      1.1  mrg 	    /* Don't warn about this trigraph in
   2661      1.1  mrg 	       _cpp_process_line_notes, since trigraphs show up as
   2662      1.1  mrg 	       trigraphs in raw strings.  */
   2663      1.1  mrg 	    uchar type = note->type;
   2664      1.1  mrg 	    note->type = 0;
   2665      1.1  mrg 
   2666      1.1  mrg 	    if (CPP_OPTION (pfile, trigraphs))
   2667      1.1  mrg 	      {
   2668      1.1  mrg 		accum.append (pfile, base, pos - base);
   2669      1.1  mrg 		base = pos;
   2670      1.1  mrg 		accum.read_begin (pfile);
   2671      1.1  mrg 		accum.append (pfile, UC"??", 2);
   2672      1.1  mrg 		accum.append (pfile, &type, 1);
   2673      1.1  mrg 
   2674      1.1  mrg 		/* ??/ followed by newline gets two line notes, one for
   2675      1.1  mrg 		   the trigraph and one for the backslash/newline.  */
   2676      1.1  mrg 		if (type == '/' && note[1].pos == pos)
   2677      1.1  mrg 		  {
   2678      1.1  mrg 		    note++;
   2679      1.1  mrg 		    gcc_assert (note->type == '\\' || note->type == ' ');
   2680      1.1  mrg 		    goto after_backslash;
   2681      1.1  mrg 		  }
   2682      1.1  mrg 		/* Skip the replacement character.  */
   2683      1.1  mrg 		base = ++pos;
   2684      1.1  mrg 	      }
   2685      1.1  mrg 
   2686      1.1  mrg 	    note++;
   2687      1.1  mrg 	    break;
   2688      1.1  mrg 	  }
   2689      1.1  mrg 
   2690      1.1  mrg       /* Now get a char to process.  Either from an expanded note, or
   2691      1.1  mrg 	 from the line buffer.  */
   2692      1.1  mrg       bool read_note = accum.reading_p ();
   2693      1.1  mrg       char c = read_note ? accum.read_char () : *pos++;
   2694      1.1  mrg 
   2695      1.1  mrg       if (phase == PHASE_PREFIX)
   2696      1.1  mrg 	{
   2697      1.1  mrg 	  if (c == '(')
   2698      1.1  mrg 	    {
   2699      1.1  mrg 	      /* Done.  */
   2700      1.1  mrg 	      phase = PHASE_NONE;
   2701      1.1  mrg 	      prefix[prefix_len++] = '"';
   2702      1.1  mrg 	    }
   2703      1.1  mrg 	  else if (prefix_len < 16
   2704      1.1  mrg 		   /* Prefix chars are any of the basic character set,
   2705      1.1  mrg 		      [lex.charset] except for '
   2706      1.1  mrg 		      ()\\\t\v\f\n'. Optimized for a contiguous
   2707      1.1  mrg 		      alphabet.  */
   2708      1.1  mrg 		   /* Unlike a switch, this collapses down to one or
   2709      1.1  mrg 		      two shift and bitmask operations on an ASCII
   2710      1.1  mrg 		      system, with an outlier or two.   */
   2711      1.1  mrg 		   && (('Z' - 'A' == 25
   2712      1.1  mrg 			? ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
   2713      1.1  mrg 			: ISIDST (c))
   2714      1.1  mrg 		       || (c >= '0' && c <= '9')
   2715      1.1  mrg 		       || c == '_' || c == '{' || c == '}'
   2716      1.1  mrg 		       || c == '[' || c == ']' || c == '#'
   2717      1.1  mrg 		       || c == '<' || c == '>' || c == '%'
   2718      1.1  mrg 		       || c == ':' || c == ';' || c == '.' || c == '?'
   2719      1.1  mrg 		       || c == '*' || c == '+' || c == '-' || c == '/'
   2720      1.1  mrg 		       || c == '^' || c == '&' || c == '|' || c == '~'
   2721      1.1  mrg 		       || c == '!' || c == '=' || c == ','
   2722      1.1  mrg 		       || c == '"' || c == '\''))
   2723      1.1  mrg 	    prefix[prefix_len++] = c;
   2724      1.1  mrg 	  else
   2725      1.1  mrg 	    {
   2726      1.1  mrg 	      /* Something is wrong.  */
   2727      1.1  mrg 	      int col = CPP_BUF_COLUMN (pfile->buffer, pos) + read_note;
   2728      1.1  mrg 	      if (prefix_len == 16)
   2729      1.1  mrg 		cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
   2730      1.1  mrg 				     col, "raw string delimiter longer "
   2731      1.1  mrg 				     "than 16 characters");
   2732      1.1  mrg 	      else if (c == '\n')
   2733      1.1  mrg 		cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
   2734      1.1  mrg 				     col, "invalid new-line in raw "
   2735      1.1  mrg 				     "string delimiter");
   2736      1.1  mrg 	      else
   2737      1.1  mrg 		cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
   2738      1.1  mrg 				     col, "invalid character '%c' in "
   2739      1.1  mrg 				     "raw string delimiter", c);
   2740      1.1  mrg 	      type = CPP_OTHER;
   2741      1.1  mrg 	      phase = PHASE_NONE;
   2742      1.1  mrg 	      /* Continue until we get a close quote, that's probably
   2743      1.1  mrg 		 the best failure mode.  */
   2744      1.1  mrg 	      prefix_len = 0;
   2745      1.1  mrg 	    }
   2746      1.1  mrg 	  if (c != '\n')
   2747      1.1  mrg 	    continue;
   2748      1.1  mrg 	}
   2749      1.1  mrg 
   2750      1.1  mrg       if (phase != PHASE_NONE)
   2751      1.1  mrg 	{
   2752      1.1  mrg 	  if (prefix[phase] != c)
   2753      1.1  mrg 	    phase = PHASE_NONE;
   2754      1.1  mrg 	  else if (unsigned (phase + 1) == prefix_len)
   2755      1.1  mrg 	    break;
   2756      1.1  mrg 	  else
   2757      1.1  mrg 	    {
   2758      1.1  mrg 	      phase = Phase (phase + 1);
   2759      1.1  mrg 	      continue;
   2760      1.1  mrg 	    }
   2761      1.1  mrg 	}
   2762      1.1  mrg 
   2763      1.1  mrg       if (!prefix_len && c == '"')
   2764      1.1  mrg 	/* Failure mode lexing.  */
   2765      1.1  mrg 	goto out;
   2766      1.1  mrg       else if (prefix_len && c == ')')
   2767      1.1  mrg 	phase = PHASE_SUFFIX;
   2768      1.1  mrg       else if (!read_note && c == '\n')
   2769      1.1  mrg 	{
   2770      1.1  mrg 	  pos--;
   2771  1.1.1.3  mrg 	  pfile->buffer->cur = pos;
   2772  1.1.1.3  mrg 	  if ((pfile->state.in_directive || pfile->state.parsing_args
   2773  1.1.1.3  mrg 	       || pfile->state.in_deferred_pragma)
   2774      1.1  mrg 	      && pfile->buffer->next_line >= pfile->buffer->rlimit)
   2775      1.1  mrg 	    {
   2776      1.1  mrg 	      cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
   2777      1.1  mrg 				   "unterminated raw string");
   2778      1.1  mrg 	      type = CPP_OTHER;
   2779      1.1  mrg 	      goto out;
   2780      1.1  mrg 	    }
   2781      1.1  mrg 
   2782      1.1  mrg 	  accum.append (pfile, base, pos - base + 1);
   2783      1.1  mrg 	  _cpp_process_line_notes (pfile, false);
   2784      1.1  mrg 
   2785      1.1  mrg 	  if (pfile->buffer->next_line < pfile->buffer->rlimit)
   2786      1.1  mrg 	    CPP_INCREMENT_LINE (pfile, 0);
   2787      1.1  mrg 	  pfile->buffer->need_line = true;
   2788  1.1.1.3  mrg 
   2789      1.1  mrg 	  if (!get_fresh_line_impl<true> (pfile))
   2790      1.1  mrg 	    {
   2791      1.1  mrg 	      /* We ran out of file and failed to get a line.  */
   2792      1.1  mrg 	      location_t src_loc = token->src_loc;
   2793      1.1  mrg 	      token->type = CPP_EOF;
   2794      1.1  mrg 	      /* Tell the compiler the line number of the EOF token.  */
   2795      1.1  mrg 	      token->src_loc = pfile->line_table->highest_line;
   2796      1.1  mrg 	      token->flags = BOL;
   2797      1.1  mrg 	      if (accum.first)
   2798      1.1  mrg 		_cpp_release_buff (pfile, accum.first);
   2799      1.1  mrg 	      cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
   2800  1.1.1.3  mrg 				   "unterminated raw string");
   2801  1.1.1.3  mrg 
   2802  1.1.1.3  mrg 	      /* Now pop the buffer that get_fresh_line_impl() did not.  Popping
   2803  1.1.1.3  mrg 		 is not safe if processing a directive, however this cannot
   2804  1.1.1.3  mrg 		 happen as we already checked above that a line would be
   2805  1.1.1.3  mrg 		 available, and get_fresh_line_impl() can't fail in this
   2806  1.1.1.3  mrg 		 case.  */
   2807      1.1  mrg 	      gcc_assert (!pfile->state.in_directive);
   2808  1.1.1.3  mrg 	      _cpp_pop_buffer (pfile);
   2809      1.1  mrg 
   2810      1.1  mrg 	      return;
   2811      1.1  mrg 	    }
   2812      1.1  mrg 
   2813      1.1  mrg 	  pos = base = pfile->buffer->cur;
   2814      1.1  mrg 	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
   2815  1.1.1.3  mrg 	}
   2816  1.1.1.3  mrg       else if (__builtin_expect ((unsigned char) c >= utf8_continuation, 0)
   2817  1.1.1.3  mrg 	       && warn_bidi_or_invalid_utf8_p)
   2818  1.1.1.3  mrg 	pos = _cpp_handle_multibyte_utf8 (pfile, c, pos, warn_bidi_p,
   2819      1.1  mrg 					  warn_invalid_utf8_p);
   2820      1.1  mrg     }
   2821      1.1  mrg 
   2822      1.1  mrg   if (warn_bidi_p)
   2823      1.1  mrg     maybe_warn_bidi_on_close (pfile, pos);
   2824      1.1  mrg 
   2825      1.1  mrg   if (CPP_OPTION (pfile, user_literals))
   2826  1.1.1.3  mrg     {
   2827  1.1.1.3  mrg       const uchar *const suffix_begin = pos;
   2828      1.1  mrg       pfile->buffer->cur = pos;
   2829  1.1.1.3  mrg 
   2830  1.1.1.3  mrg       if (const auto sr = scan_cur_identifier (pfile))
   2831  1.1.1.3  mrg 	{
   2832  1.1.1.3  mrg 	  if (maybe_ignore_udl_macro_suffix (pfile, token->src_loc,
   2833  1.1.1.3  mrg 					     suffix_begin, sr.node))
   2834  1.1.1.3  mrg 	      pfile->buffer->cur = suffix_begin;
   2835  1.1.1.3  mrg 	  else
   2836  1.1.1.3  mrg 	    {
   2837  1.1.1.3  mrg 	      type = cpp_userdef_string_add_type (type);
   2838  1.1.1.3  mrg 	      accum.create_literal2 (pfile, token, base, suffix_begin - base,
   2839  1.1.1.3  mrg 				     NODE_NAME (sr.node), NODE_LEN (sr.node),
   2840  1.1.1.3  mrg 				     type);
   2841  1.1.1.3  mrg 	      if (accum.first)
   2842  1.1.1.3  mrg 		_cpp_release_buff (pfile, accum.first);
   2843  1.1.1.3  mrg 	      warn_about_normalization (pfile, token, &sr.nst, true);
   2844  1.1.1.3  mrg 	      return;
   2845      1.1  mrg 	    }
   2846      1.1  mrg 	}
   2847      1.1  mrg     }
   2848      1.1  mrg 
   2849      1.1  mrg  out:
   2850      1.1  mrg   pfile->buffer->cur = pos;
   2851      1.1  mrg   if (!accum.accum)
   2852      1.1  mrg     create_literal (pfile, token, base, pos - base, type);
   2853      1.1  mrg   else
   2854  1.1.1.3  mrg     {
   2855      1.1  mrg       accum.create_literal2 (pfile, token, base, pos - base, nullptr, 0, type);
   2856      1.1  mrg       _cpp_release_buff (pfile, accum.first);
   2857      1.1  mrg     }
   2858      1.1  mrg }
   2859      1.1  mrg 
   2860      1.1  mrg /* Lexes a string, character constant, or angle-bracketed header file
   2861      1.1  mrg    name.  The stored string contains the spelling, including opening
   2862      1.1  mrg    quote and any leading 'L', 'u', 'U' or 'u8' and optional
   2863      1.1  mrg    'R' modifier.  It returns the type of the literal, or CPP_OTHER
   2864      1.1  mrg    if it was not properly terminated, or CPP_LESS for an unterminated
   2865      1.1  mrg    header name which must be relexed as normal tokens.
   2866      1.1  mrg 
   2867      1.1  mrg    The spelling is NUL-terminated, but it is not guaranteed that this
   2868      1.1  mrg    is the first NUL since embedded NULs are preserved.  */
   2869      1.1  mrg static void
   2870      1.1  mrg lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
   2871      1.1  mrg {
   2872      1.1  mrg   bool saw_NUL = false;
   2873      1.1  mrg   const uchar *cur;
   2874      1.1  mrg   cppchar_t terminator;
   2875      1.1  mrg   enum cpp_ttype type;
   2876      1.1  mrg 
   2877      1.1  mrg   cur = base;
   2878      1.1  mrg   terminator = *cur++;
   2879      1.1  mrg   if (terminator == 'L' || terminator == 'U')
   2880      1.1  mrg     terminator = *cur++;
   2881      1.1  mrg   else if (terminator == 'u')
   2882      1.1  mrg     {
   2883      1.1  mrg       terminator = *cur++;
   2884      1.1  mrg       if (terminator == '8')
   2885      1.1  mrg 	terminator = *cur++;
   2886      1.1  mrg     }
   2887      1.1  mrg   if (terminator == 'R')
   2888      1.1  mrg     {
   2889      1.1  mrg       lex_raw_string (pfile, token, base);
   2890      1.1  mrg       return;
   2891      1.1  mrg     }
   2892      1.1  mrg   if (terminator == '"')
   2893      1.1  mrg     type = (*base == 'L' ? CPP_WSTRING :
   2894      1.1  mrg 	    *base == 'U' ? CPP_STRING32 :
   2895      1.1  mrg 	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
   2896      1.1  mrg 			 : CPP_STRING);
   2897      1.1  mrg   else if (terminator == '\'')
   2898      1.1  mrg     type = (*base == 'L' ? CPP_WCHAR :
   2899      1.1  mrg 	    *base == 'U' ? CPP_CHAR32 :
   2900      1.1  mrg 	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8CHAR : CPP_CHAR16)
   2901      1.1  mrg 			 : CPP_CHAR);
   2902      1.1  mrg   else
   2903      1.1  mrg     terminator = '>', type = CPP_HEADER_NAME;
   2904      1.1  mrg 
   2905  1.1.1.3  mrg   const bool warn_bidi_p = pfile->warn_bidi_p ();
   2906  1.1.1.3  mrg   const bool warn_invalid_utf8_p = CPP_OPTION (pfile, cpp_warn_invalid_utf8);
   2907      1.1  mrg   const bool warn_bidi_or_invalid_utf8_p = warn_bidi_p | warn_invalid_utf8_p;
   2908      1.1  mrg   for (;;)
   2909      1.1  mrg     {
   2910      1.1  mrg       cppchar_t c = *cur++;
   2911      1.1  mrg 
   2912      1.1  mrg       /* In #include-style directives, terminators are not escapable.  */
   2913      1.1  mrg       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
   2914  1.1.1.3  mrg 	{
   2915      1.1  mrg 	  if ((cur[0] == 'u' || cur[0] == 'U' || cur[0] == 'N') && warn_bidi_p)
   2916      1.1  mrg 	    {
   2917  1.1.1.3  mrg 	      location_t loc;
   2918  1.1.1.3  mrg 	      bidi::kind kind;
   2919  1.1.1.3  mrg 	      if (cur[0] == 'N')
   2920  1.1.1.3  mrg 		kind = get_bidi_named (pfile, cur + 1, &loc);
   2921  1.1.1.3  mrg 	      else
   2922      1.1  mrg 		kind = get_bidi_ucn (pfile, cur + 1, cur[0] == 'U', &loc);
   2923      1.1  mrg 	      maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc);
   2924      1.1  mrg 	    }
   2925      1.1  mrg 	  cur++;
   2926      1.1  mrg 	}
   2927      1.1  mrg       else if (c == terminator)
   2928      1.1  mrg 	{
   2929      1.1  mrg 	  if (warn_bidi_p)
   2930      1.1  mrg 	    maybe_warn_bidi_on_close (pfile, cur - 1);
   2931      1.1  mrg 	  break;
   2932      1.1  mrg 	}
   2933      1.1  mrg       else if (c == '\n')
   2934      1.1  mrg 	{
   2935      1.1  mrg 	  cur--;
   2936      1.1  mrg 	  /* Unmatched quotes always yield undefined behavior, but
   2937      1.1  mrg 	     greedy lexing means that what appears to be an unterminated
   2938      1.1  mrg 	     header name may actually be a legitimate sequence of tokens.  */
   2939      1.1  mrg 	  if (terminator == '>')
   2940      1.1  mrg 	    {
   2941      1.1  mrg 	      token->type = CPP_LESS;
   2942      1.1  mrg 	      return;
   2943      1.1  mrg 	    }
   2944      1.1  mrg 	  type = CPP_OTHER;
   2945      1.1  mrg 	  break;
   2946      1.1  mrg 	}
   2947      1.1  mrg       else if (c == '\0')
   2948  1.1.1.3  mrg 	saw_NUL = true;
   2949  1.1.1.3  mrg       else if (__builtin_expect (c >= utf8_continuation, 0)
   2950  1.1.1.3  mrg 	       && warn_bidi_or_invalid_utf8_p)
   2951  1.1.1.3  mrg 	cur = _cpp_handle_multibyte_utf8 (pfile, c, cur, warn_bidi_p,
   2952      1.1  mrg 					  warn_invalid_utf8_p);
   2953      1.1  mrg     }
   2954      1.1  mrg 
   2955      1.1  mrg   if (saw_NUL && !pfile->state.skipping)
   2956      1.1  mrg     cpp_error (pfile, CPP_DL_WARNING,
   2957      1.1  mrg 	       "null character(s) preserved in literal");
   2958      1.1  mrg 
   2959      1.1  mrg   if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
   2960      1.1  mrg     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
   2961      1.1  mrg 	       (int) terminator);
   2962  1.1.1.3  mrg 
   2963  1.1.1.3  mrg   pfile->buffer->cur = cur;
   2964  1.1.1.3  mrg   const uchar *const suffix_begin = cur;
   2965      1.1  mrg 
   2966      1.1  mrg   if (CPP_OPTION (pfile, user_literals))
   2967  1.1.1.3  mrg     {
   2968  1.1.1.3  mrg       if (const auto sr = scan_cur_identifier (pfile))
   2969  1.1.1.3  mrg 	{
   2970  1.1.1.3  mrg 	  if (maybe_ignore_udl_macro_suffix (pfile, token->src_loc,
   2971  1.1.1.3  mrg 					     suffix_begin, sr.node))
   2972  1.1.1.3  mrg 	    pfile->buffer->cur = suffix_begin;
   2973  1.1.1.3  mrg 	  else
   2974  1.1.1.3  mrg 	    {
   2975  1.1.1.3  mrg 	      /* Grab user defined literal suffix.  */
   2976  1.1.1.3  mrg 	      type = cpp_userdef_char_add_type (type);
   2977  1.1.1.3  mrg 	      type = cpp_userdef_string_add_type (type);
   2978  1.1.1.3  mrg 	      create_literal2 (pfile, token, base, suffix_begin - base,
   2979  1.1.1.3  mrg 			       NODE_NAME (sr.node), NODE_LEN (sr.node), type);
   2980  1.1.1.3  mrg 	      warn_about_normalization (pfile, token, &sr.nst, true);
   2981  1.1.1.3  mrg 	      return;
   2982      1.1  mrg 	    }
   2983      1.1  mrg 	}
   2984      1.1  mrg     }
   2985      1.1  mrg   else if (CPP_OPTION (pfile, cpp_warn_cxx11_compat)
   2986  1.1.1.3  mrg 	   && !pfile->state.skipping)
   2987  1.1.1.3  mrg     {
   2988  1.1.1.3  mrg       const auto sr = scan_cur_identifier (pfile);
   2989  1.1.1.3  mrg       /* Maybe raise a warning, but do not consume the tokens.  */
   2990  1.1.1.3  mrg       pfile->buffer->cur = suffix_begin;
   2991  1.1.1.3  mrg       if (sr && cpp_macro_p (sr.node))
   2992  1.1.1.3  mrg 	cpp_warning_with_line (pfile, CPP_W_CXX11_COMPAT,
   2993  1.1.1.3  mrg 			       token->src_loc, 0, "C++11 requires a space "
   2994  1.1.1.3  mrg 			       "between string literal and macro");
   2995      1.1  mrg     }
   2996      1.1  mrg 
   2997      1.1  mrg   create_literal (pfile, token, base, cur - base, type);
   2998      1.1  mrg }
   2999      1.1  mrg 
   3000      1.1  mrg /* Return the comment table. The client may not make any assumption
   3001      1.1  mrg    about the ordering of the table.  */
   3002      1.1  mrg cpp_comment_table *
   3003      1.1  mrg cpp_get_comments (cpp_reader *pfile)
   3004      1.1  mrg {
   3005      1.1  mrg   return &pfile->comments;
   3006      1.1  mrg }
   3007      1.1  mrg 
   3008      1.1  mrg /* Append a comment to the end of the comment table. */
   3009      1.1  mrg static void
   3010      1.1  mrg store_comment (cpp_reader *pfile, cpp_token *token)
   3011      1.1  mrg {
   3012      1.1  mrg   int len;
   3013      1.1  mrg 
   3014      1.1  mrg   if (pfile->comments.allocated == 0)
   3015      1.1  mrg     {
   3016      1.1  mrg       pfile->comments.allocated = 256;
   3017      1.1  mrg       pfile->comments.entries = (cpp_comment *) xmalloc
   3018      1.1  mrg 	(pfile->comments.allocated * sizeof (cpp_comment));
   3019      1.1  mrg     }
   3020      1.1  mrg 
   3021      1.1  mrg   if (pfile->comments.count == pfile->comments.allocated)
   3022      1.1  mrg     {
   3023      1.1  mrg       pfile->comments.allocated *= 2;
   3024      1.1  mrg       pfile->comments.entries = (cpp_comment *) xrealloc
   3025      1.1  mrg 	(pfile->comments.entries,
   3026      1.1  mrg 	 pfile->comments.allocated * sizeof (cpp_comment));
   3027      1.1  mrg     }
   3028      1.1  mrg 
   3029      1.1  mrg   len = token->val.str.len;
   3030      1.1  mrg 
   3031      1.1  mrg   /* Copy comment. Note, token may not be NULL terminated. */
   3032      1.1  mrg   pfile->comments.entries[pfile->comments.count].comment =
   3033      1.1  mrg     (char *) xmalloc (sizeof (char) * (len + 1));
   3034      1.1  mrg   memcpy (pfile->comments.entries[pfile->comments.count].comment,
   3035      1.1  mrg 	  token->val.str.text, len);
   3036      1.1  mrg   pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
   3037      1.1  mrg 
   3038      1.1  mrg   /* Set source location. */
   3039      1.1  mrg   pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
   3040      1.1  mrg 
   3041      1.1  mrg   /* Increment the count of entries in the comment table. */
   3042      1.1  mrg   pfile->comments.count++;
   3043      1.1  mrg }
   3044      1.1  mrg 
   3045      1.1  mrg /* The stored comment includes the comment start and any terminator.  */
   3046      1.1  mrg static void
   3047      1.1  mrg save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
   3048      1.1  mrg 	      cppchar_t type)
   3049      1.1  mrg {
   3050      1.1  mrg   unsigned char *buffer;
   3051      1.1  mrg   unsigned int len, clen, i;
   3052      1.1  mrg 
   3053      1.1  mrg   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
   3054      1.1  mrg 
   3055      1.1  mrg   /* C++ comments probably (not definitely) have moved past a new
   3056      1.1  mrg      line, which we don't want to save in the comment.  */
   3057      1.1  mrg   if (is_vspace (pfile->buffer->cur[-1]))
   3058      1.1  mrg     len--;
   3059      1.1  mrg 
   3060      1.1  mrg   /* If we are currently in a directive or in argument parsing, then
   3061      1.1  mrg      we need to store all C++ comments as C comments internally, and
   3062      1.1  mrg      so we need to allocate a little extra space in that case.
   3063      1.1  mrg 
   3064      1.1  mrg      Note that the only time we encounter a directive here is
   3065      1.1  mrg      when we are saving comments in a "#define".  */
   3066      1.1  mrg   clen = ((pfile->state.in_directive || pfile->state.parsing_args)
   3067      1.1  mrg 	  && type == '/') ? len + 2 : len;
   3068      1.1  mrg 
   3069      1.1  mrg   buffer = _cpp_unaligned_alloc (pfile, clen);
   3070      1.1  mrg 
   3071      1.1  mrg   token->type = CPP_COMMENT;
   3072      1.1  mrg   token->val.str.len = clen;
   3073      1.1  mrg   token->val.str.text = buffer;
   3074      1.1  mrg 
   3075      1.1  mrg   buffer[0] = '/';
   3076      1.1  mrg   memcpy (buffer + 1, from, len - 1);
   3077      1.1  mrg 
   3078      1.1  mrg   /* Finish conversion to a C comment, if necessary.  */
   3079      1.1  mrg   if ((pfile->state.in_directive || pfile->state.parsing_args) && type == '/')
   3080      1.1  mrg     {
   3081      1.1  mrg       buffer[1] = '*';
   3082      1.1  mrg       buffer[clen - 2] = '*';
   3083      1.1  mrg       buffer[clen - 1] = '/';
   3084      1.1  mrg       /* As there can be in a C++ comments illegal sequences for C comments
   3085      1.1  mrg          we need to filter them out.  */
   3086      1.1  mrg       for (i = 2; i < (clen - 2); i++)
   3087      1.1  mrg         if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
   3088      1.1  mrg           buffer[i] = '|';
   3089      1.1  mrg     }
   3090      1.1  mrg 
   3091      1.1  mrg   /* Finally store this comment for use by clients of libcpp. */
   3092      1.1  mrg   store_comment (pfile, token);
   3093      1.1  mrg }
   3094      1.1  mrg 
   3095      1.1  mrg /* Returns true if comment at COMMENT_START is a recognized FALLTHROUGH
   3096      1.1  mrg    comment.  */
   3097      1.1  mrg 
   3098      1.1  mrg static bool
   3099      1.1  mrg fallthrough_comment_p (cpp_reader *pfile, const unsigned char *comment_start)
   3100      1.1  mrg {
   3101      1.1  mrg   const unsigned char *from = comment_start + 1;
   3102      1.1  mrg 
   3103      1.1  mrg   switch (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough))
   3104      1.1  mrg     {
   3105      1.1  mrg       /* For both -Wimplicit-fallthrough=0 and -Wimplicit-fallthrough=5 we
   3106      1.1  mrg 	 don't recognize any comments.  The latter only checks attributes,
   3107      1.1  mrg 	 the former doesn't warn.  */
   3108      1.1  mrg     case 0:
   3109      1.1  mrg     default:
   3110      1.1  mrg       return false;
   3111      1.1  mrg       /* -Wimplicit-fallthrough=1 considers any comment, no matter what
   3112      1.1  mrg 	 content it has.  */
   3113      1.1  mrg     case 1:
   3114      1.1  mrg       return true;
   3115      1.1  mrg     case 2:
   3116      1.1  mrg       /* -Wimplicit-fallthrough=2 looks for (case insensitive)
   3117      1.1  mrg 	 .*falls?[ \t-]*thr(u|ough).* regex.  */
   3118      1.1  mrg       for (; (size_t) (pfile->buffer->cur - from) >= sizeof "fallthru" - 1;
   3119      1.1  mrg 	   from++)
   3120      1.1  mrg 	{
   3121      1.1  mrg 	  /* Is there anything like strpbrk with upper boundary, or
   3122      1.1  mrg 	     memchr looking for 2 characters rather than just one?  */
   3123      1.1  mrg 	  if (from[0] != 'f' && from[0] != 'F')
   3124      1.1  mrg 	    continue;
   3125      1.1  mrg 	  if (from[1] != 'a' && from[1] != 'A')
   3126      1.1  mrg 	    continue;
   3127      1.1  mrg 	  if (from[2] != 'l' && from[2] != 'L')
   3128      1.1  mrg 	    continue;
   3129      1.1  mrg 	  if (from[3] != 'l' && from[3] != 'L')
   3130      1.1  mrg 	    continue;
   3131      1.1  mrg 	  from += sizeof "fall" - 1;
   3132      1.1  mrg 	  if (from[0] == 's' || from[0] == 'S')
   3133      1.1  mrg 	    from++;
   3134      1.1  mrg 	  while (*from == ' ' || *from == '\t' || *from == '-')
   3135      1.1  mrg 	    from++;
   3136      1.1  mrg 	  if (from[0] != 't' && from[0] != 'T')
   3137      1.1  mrg 	    continue;
   3138      1.1  mrg 	  if (from[1] != 'h' && from[1] != 'H')
   3139      1.1  mrg 	    continue;
   3140      1.1  mrg 	  if (from[2] != 'r' && from[2] != 'R')
   3141      1.1  mrg 	    continue;
   3142      1.1  mrg 	  if (from[3] == 'u' || from[3] == 'U')
   3143      1.1  mrg 	    return true;
   3144      1.1  mrg 	  if (from[3] != 'o' && from[3] != 'O')
   3145      1.1  mrg 	    continue;
   3146      1.1  mrg 	  if (from[4] != 'u' && from[4] != 'U')
   3147      1.1  mrg 	    continue;
   3148      1.1  mrg 	  if (from[5] != 'g' && from[5] != 'G')
   3149      1.1  mrg 	    continue;
   3150      1.1  mrg 	  if (from[6] != 'h' && from[6] != 'H')
   3151      1.1  mrg 	    continue;
   3152      1.1  mrg 	  return true;
   3153      1.1  mrg 	}
   3154      1.1  mrg       return false;
   3155      1.1  mrg     case 3:
   3156      1.1  mrg     case 4:
   3157      1.1  mrg       break;
   3158      1.1  mrg     }
   3159      1.1  mrg 
   3160      1.1  mrg   /* Whole comment contents:
   3161      1.1  mrg      -fallthrough
   3162      1.1  mrg      @fallthrough@
   3163      1.1  mrg    */
   3164      1.1  mrg   if (*from == '-' || *from == '@')
   3165      1.1  mrg     {
   3166      1.1  mrg       size_t len = sizeof "fallthrough" - 1;
   3167      1.1  mrg       if ((size_t) (pfile->buffer->cur - from - 1) < len)
   3168      1.1  mrg 	return false;
   3169      1.1  mrg       if (memcmp (from + 1, "fallthrough", len))
   3170      1.1  mrg 	return false;
   3171      1.1  mrg       if (*from == '@')
   3172      1.1  mrg 	{
   3173      1.1  mrg 	  if (from[len + 1] != '@')
   3174      1.1  mrg 	    return false;
   3175      1.1  mrg 	  len++;
   3176      1.1  mrg 	}
   3177      1.1  mrg       from += 1 + len;
   3178      1.1  mrg     }
   3179      1.1  mrg   /* Whole comment contents (regex):
   3180      1.1  mrg      lint -fallthrough[ \t]*
   3181      1.1  mrg    */
   3182      1.1  mrg   else if (*from == 'l')
   3183      1.1  mrg     {
   3184      1.1  mrg       size_t len = sizeof "int -fallthrough" - 1;
   3185      1.1  mrg       if ((size_t) (pfile->buffer->cur - from - 1) < len)
   3186      1.1  mrg 	return false;
   3187      1.1  mrg       if (memcmp (from + 1, "int -fallthrough", len))
   3188      1.1  mrg 	return false;
   3189      1.1  mrg       from += 1 + len;
   3190      1.1  mrg       while (*from == ' ' || *from == '\t')
   3191      1.1  mrg 	from++;
   3192      1.1  mrg     }
   3193      1.1  mrg   /* Whole comment contents (regex):
   3194      1.1  mrg      [ \t]*FALLTHR(U|OUGH)[ \t]*
   3195      1.1  mrg    */
   3196      1.1  mrg   else if (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough) == 4)
   3197      1.1  mrg     {
   3198      1.1  mrg       while (*from == ' ' || *from == '\t')
   3199      1.1  mrg 	from++;
   3200      1.1  mrg       if ((size_t) (pfile->buffer->cur - from)  < sizeof "FALLTHRU" - 1)
   3201      1.1  mrg 	return false;
   3202      1.1  mrg       if (memcmp (from, "FALLTHR", sizeof "FALLTHR" - 1))
   3203      1.1  mrg 	return false;
   3204      1.1  mrg       from += sizeof "FALLTHR" - 1;
   3205      1.1  mrg       if (*from == 'U')
   3206      1.1  mrg 	from++;
   3207      1.1  mrg       else if ((size_t) (pfile->buffer->cur - from)  < sizeof "OUGH" - 1)
   3208      1.1  mrg 	return false;
   3209      1.1  mrg       else if (memcmp (from, "OUGH", sizeof "OUGH" - 1))
   3210      1.1  mrg 	return false;
   3211      1.1  mrg       else
   3212      1.1  mrg 	from += sizeof "OUGH" - 1;
   3213      1.1  mrg       while (*from == ' ' || *from == '\t')
   3214      1.1  mrg 	from++;
   3215      1.1  mrg     }
   3216      1.1  mrg   /* Whole comment contents (regex):
   3217      1.1  mrg      [ \t.!]*(ELSE,? |INTENTIONAL(LY)? )?FALL(S | |-)?THR(OUGH|U)[ \t.!]*(-[^\n\r]*)?
   3218      1.1  mrg      [ \t.!]*(Else,? |Intentional(ly)? )?Fall((s | |-)[Tt]|t)hr(ough|u)[ \t.!]*(-[^\n\r]*)?
   3219      1.1  mrg      [ \t.!]*([Ee]lse,? |[Ii]ntentional(ly)? )?fall(s | |-)?thr(ough|u)[ \t.!]*(-[^\n\r]*)?
   3220      1.1  mrg    */
   3221      1.1  mrg   else
   3222      1.1  mrg     {
   3223      1.1  mrg       while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
   3224      1.1  mrg 	from++;
   3225      1.1  mrg       unsigned char f = *from;
   3226      1.1  mrg       bool all_upper = false;
   3227      1.1  mrg       if (f == 'E' || f == 'e')
   3228      1.1  mrg 	{
   3229      1.1  mrg 	  if ((size_t) (pfile->buffer->cur - from)
   3230      1.1  mrg 	      < sizeof "else fallthru" - 1)
   3231      1.1  mrg 	    return false;
   3232      1.1  mrg 	  if (f == 'E' && memcmp (from + 1, "LSE", sizeof "LSE" - 1) == 0)
   3233      1.1  mrg 	    all_upper = true;
   3234      1.1  mrg 	  else if (memcmp (from + 1, "lse", sizeof "lse" - 1))
   3235      1.1  mrg 	    return false;
   3236      1.1  mrg 	  from += sizeof "else" - 1;
   3237      1.1  mrg 	  if (*from == ',')
   3238      1.1  mrg 	    from++;
   3239      1.1  mrg 	  if (*from != ' ')
   3240      1.1  mrg 	    return false;
   3241      1.1  mrg 	  from++;
   3242      1.1  mrg 	  if (all_upper && *from == 'f')
   3243      1.1  mrg 	    return false;
   3244      1.1  mrg 	  if (f == 'e' && *from == 'F')
   3245      1.1  mrg 	    return false;
   3246      1.1  mrg 	  f = *from;
   3247      1.1  mrg 	}
   3248      1.1  mrg       else if (f == 'I' || f == 'i')
   3249      1.1  mrg 	{
   3250      1.1  mrg 	  if ((size_t) (pfile->buffer->cur - from)
   3251      1.1  mrg 	      < sizeof "intentional fallthru" - 1)
   3252      1.1  mrg 	    return false;
   3253      1.1  mrg 	  if (f == 'I' && memcmp (from + 1, "NTENTIONAL",
   3254      1.1  mrg 				  sizeof "NTENTIONAL" - 1) == 0)
   3255      1.1  mrg 	    all_upper = true;
   3256      1.1  mrg 	  else if (memcmp (from + 1, "ntentional",
   3257      1.1  mrg 			   sizeof "ntentional" - 1))
   3258      1.1  mrg 	    return false;
   3259      1.1  mrg 	  from += sizeof "intentional" - 1;
   3260      1.1  mrg 	  if (*from == ' ')
   3261      1.1  mrg 	    {
   3262      1.1  mrg 	      from++;
   3263      1.1  mrg 	      if (all_upper && *from == 'f')
   3264      1.1  mrg 		return false;
   3265      1.1  mrg 	    }
   3266      1.1  mrg 	  else if (all_upper)
   3267      1.1  mrg 	    {
   3268      1.1  mrg 	      if (memcmp (from, "LY F", sizeof "LY F" - 1))
   3269      1.1  mrg 		return false;
   3270      1.1  mrg 	      from += sizeof "LY " - 1;
   3271      1.1  mrg 	    }
   3272      1.1  mrg 	  else
   3273      1.1  mrg 	    {
   3274      1.1  mrg 	      if (memcmp (from, "ly ", sizeof "ly " - 1))
   3275      1.1  mrg 		return false;
   3276      1.1  mrg 	      from += sizeof "ly " - 1;
   3277      1.1  mrg 	    }
   3278      1.1  mrg 	  if (f == 'i' && *from == 'F')
   3279      1.1  mrg 	    return false;
   3280      1.1  mrg 	  f = *from;
   3281      1.1  mrg 	}
   3282      1.1  mrg       if (f != 'F' && f != 'f')
   3283      1.1  mrg 	return false;
   3284      1.1  mrg       if ((size_t) (pfile->buffer->cur - from) < sizeof "fallthru" - 1)
   3285      1.1  mrg 	return false;
   3286      1.1  mrg       if (f == 'F' && memcmp (from + 1, "ALL", sizeof "ALL" - 1) == 0)
   3287      1.1  mrg 	all_upper = true;
   3288      1.1  mrg       else if (all_upper)
   3289      1.1  mrg 	return false;
   3290      1.1  mrg       else if (memcmp (from + 1, "all", sizeof "all" - 1))
   3291      1.1  mrg 	return false;
   3292      1.1  mrg       from += sizeof "fall" - 1;
   3293      1.1  mrg       if (*from == (all_upper ? 'S' : 's') && from[1] == ' ')
   3294      1.1  mrg 	from += 2;
   3295      1.1  mrg       else if (*from == ' ' || *from == '-')
   3296      1.1  mrg 	from++;
   3297      1.1  mrg       else if (*from != (all_upper ? 'T' : 't'))
   3298      1.1  mrg 	return false;
   3299      1.1  mrg       if ((f == 'f' || *from != 'T') && (all_upper || *from != 't'))
   3300      1.1  mrg 	return false;
   3301      1.1  mrg       if ((size_t) (pfile->buffer->cur - from) < sizeof "thru" - 1)
   3302      1.1  mrg 	return false;
   3303      1.1  mrg       if (memcmp (from + 1, all_upper ? "HRU" : "hru", sizeof "hru" - 1))
   3304      1.1  mrg 	{
   3305      1.1  mrg 	  if ((size_t) (pfile->buffer->cur - from) < sizeof "through" - 1)
   3306      1.1  mrg 	    return false;
   3307      1.1  mrg 	  if (memcmp (from + 1, all_upper ? "HROUGH" : "hrough",
   3308      1.1  mrg 		      sizeof "hrough" - 1))
   3309      1.1  mrg 	    return false;
   3310      1.1  mrg 	  from += sizeof "through" - 1;
   3311      1.1  mrg 	}
   3312      1.1  mrg       else
   3313      1.1  mrg 	from += sizeof "thru" - 1;
   3314      1.1  mrg       while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
   3315      1.1  mrg 	from++;
   3316      1.1  mrg       if (*from == '-')
   3317      1.1  mrg 	{
   3318      1.1  mrg 	  from++;
   3319      1.1  mrg 	  if (*comment_start == '*')
   3320      1.1  mrg 	    {
   3321      1.1  mrg 	      do
   3322      1.1  mrg 		{
   3323      1.1  mrg 		  while (*from && *from != '*'
   3324      1.1  mrg 			 && *from != '\n' && *from != '\r')
   3325      1.1  mrg 		    from++;
   3326      1.1  mrg 		  if (*from != '*' || from[1] == '/')
   3327      1.1  mrg 		    break;
   3328      1.1  mrg 		  from++;
   3329      1.1  mrg 		}
   3330      1.1  mrg 	      while (1);
   3331      1.1  mrg 	    }
   3332      1.1  mrg 	  else
   3333      1.1  mrg 	    while (*from && *from != '\n' && *from != '\r')
   3334      1.1  mrg 	      from++;
   3335      1.1  mrg 	}
   3336      1.1  mrg     }
   3337      1.1  mrg   /* C block comment.  */
   3338      1.1  mrg   if (*comment_start == '*')
   3339      1.1  mrg     {
   3340      1.1  mrg       if (*from != '*' || from[1] != '/')
   3341      1.1  mrg 	return false;
   3342      1.1  mrg     }
   3343      1.1  mrg   /* C++ line comment.  */
   3344      1.1  mrg   else if (*from != '\n')
   3345      1.1  mrg     return false;
   3346      1.1  mrg 
   3347      1.1  mrg   return true;
   3348      1.1  mrg }
   3349      1.1  mrg 
   3350      1.1  mrg /* Allocate COUNT tokens for RUN.  */
   3351      1.1  mrg void
   3352      1.1  mrg _cpp_init_tokenrun (tokenrun *run, unsigned int count)
   3353      1.1  mrg {
   3354      1.1  mrg   run->base = XNEWVEC (cpp_token, count);
   3355      1.1  mrg   run->limit = run->base + count;
   3356      1.1  mrg   run->next = NULL;
   3357      1.1  mrg }
   3358      1.1  mrg 
   3359      1.1  mrg /* Returns the next tokenrun, or creates one if there is none.  */
   3360      1.1  mrg static tokenrun *
   3361      1.1  mrg next_tokenrun (tokenrun *run)
   3362      1.1  mrg {
   3363      1.1  mrg   if (run->next == NULL)
   3364      1.1  mrg     {
   3365      1.1  mrg       run->next = XNEW (tokenrun);
   3366      1.1  mrg       run->next->prev = run;
   3367      1.1  mrg       _cpp_init_tokenrun (run->next, 250);
   3368      1.1  mrg     }
   3369      1.1  mrg 
   3370      1.1  mrg   return run->next;
   3371      1.1  mrg }
   3372      1.1  mrg 
   3373      1.1  mrg /* Return the number of not yet processed token in a given
   3374      1.1  mrg    context.  */
   3375      1.1  mrg int
   3376      1.1  mrg _cpp_remaining_tokens_num_in_context (cpp_context *context)
   3377      1.1  mrg {
   3378      1.1  mrg   if (context->tokens_kind == TOKENS_KIND_DIRECT)
   3379      1.1  mrg     return (LAST (context).token - FIRST (context).token);
   3380      1.1  mrg   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
   3381      1.1  mrg 	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
   3382      1.1  mrg     return (LAST (context).ptoken - FIRST (context).ptoken);
   3383      1.1  mrg   else
   3384      1.1  mrg       abort ();
   3385      1.1  mrg }
   3386      1.1  mrg 
   3387      1.1  mrg /* Returns the token present at index INDEX in a given context.  If
   3388      1.1  mrg    INDEX is zero, the next token to be processed is returned.  */
   3389      1.1  mrg static const cpp_token*
   3390      1.1  mrg _cpp_token_from_context_at (cpp_context *context, int index)
   3391      1.1  mrg {
   3392      1.1  mrg   if (context->tokens_kind == TOKENS_KIND_DIRECT)
   3393      1.1  mrg     return &(FIRST (context).token[index]);
   3394      1.1  mrg   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
   3395      1.1  mrg 	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
   3396      1.1  mrg     return FIRST (context).ptoken[index];
   3397      1.1  mrg  else
   3398      1.1  mrg    abort ();
   3399      1.1  mrg }
   3400      1.1  mrg 
   3401      1.1  mrg /* Look ahead in the input stream.  */
   3402      1.1  mrg const cpp_token *
   3403      1.1  mrg cpp_peek_token (cpp_reader *pfile, int index)
   3404      1.1  mrg {
   3405      1.1  mrg   cpp_context *context = pfile->context;
   3406      1.1  mrg   const cpp_token *peektok;
   3407      1.1  mrg   int count;
   3408      1.1  mrg 
   3409      1.1  mrg   /* First, scan through any pending cpp_context objects.  */
   3410      1.1  mrg   while (context->prev)
   3411      1.1  mrg     {
   3412      1.1  mrg       ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
   3413      1.1  mrg 
   3414      1.1  mrg       if (index < (int) sz)
   3415      1.1  mrg         return _cpp_token_from_context_at (context, index);
   3416      1.1  mrg       index -= (int) sz;
   3417      1.1  mrg       context = context->prev;
   3418      1.1  mrg     }
   3419      1.1  mrg 
   3420      1.1  mrg   /* We will have to read some new tokens after all (and do so
   3421      1.1  mrg      without invalidating preceding tokens).  */
   3422      1.1  mrg   count = index;
   3423      1.1  mrg   pfile->keep_tokens++;
   3424      1.1  mrg 
   3425      1.1  mrg   /* For peeked tokens temporarily disable line_change reporting,
   3426      1.1  mrg      until the tokens are parsed for real.  */
   3427      1.1  mrg   void (*line_change) (cpp_reader *, const cpp_token *, int)
   3428      1.1  mrg     = pfile->cb.line_change;
   3429      1.1  mrg   pfile->cb.line_change = NULL;
   3430      1.1  mrg 
   3431      1.1  mrg   do
   3432      1.1  mrg     {
   3433      1.1  mrg       peektok = _cpp_lex_token (pfile);
   3434      1.1  mrg       if (peektok->type == CPP_EOF)
   3435      1.1  mrg 	{
   3436      1.1  mrg 	  index--;
   3437      1.1  mrg 	  break;
   3438      1.1  mrg 	}
   3439      1.1  mrg       else if (peektok->type == CPP_PRAGMA)
   3440      1.1  mrg 	{
   3441      1.1  mrg 	  /* Don't peek past a pragma.  */
   3442      1.1  mrg 	  if (peektok == &pfile->directive_result)
   3443      1.1  mrg 	    /* Save the pragma in the buffer.  */
   3444      1.1  mrg 	    *pfile->cur_token++ = *peektok;
   3445      1.1  mrg 	  index--;
   3446      1.1  mrg 	  break;
   3447      1.1  mrg 	}
   3448      1.1  mrg     }
   3449      1.1  mrg   while (index--);
   3450      1.1  mrg 
   3451      1.1  mrg   _cpp_backup_tokens_direct (pfile, count - index);
   3452      1.1  mrg   pfile->keep_tokens--;
   3453      1.1  mrg   pfile->cb.line_change = line_change;
   3454      1.1  mrg 
   3455      1.1  mrg   return peektok;
   3456      1.1  mrg }
   3457      1.1  mrg 
   3458      1.1  mrg /* Allocate a single token that is invalidated at the same time as the
   3459      1.1  mrg    rest of the tokens on the line.  Has its line and col set to the
   3460      1.1  mrg    same as the last lexed token, so that diagnostics appear in the
   3461      1.1  mrg    right place.  */
   3462      1.1  mrg cpp_token *
   3463      1.1  mrg _cpp_temp_token (cpp_reader *pfile)
   3464      1.1  mrg {
   3465      1.1  mrg   cpp_token *old, *result;
   3466      1.1  mrg   ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
   3467      1.1  mrg   ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
   3468      1.1  mrg 
   3469      1.1  mrg   old = pfile->cur_token - 1;
   3470      1.1  mrg   /* Any pre-existing lookaheads must not be clobbered.  */
   3471      1.1  mrg   if (la)
   3472      1.1  mrg     {
   3473      1.1  mrg       if (sz <= la)
   3474      1.1  mrg         {
   3475      1.1  mrg           tokenrun *next = next_tokenrun (pfile->cur_run);
   3476      1.1  mrg 
   3477      1.1  mrg           if (sz < la)
   3478      1.1  mrg             memmove (next->base + 1, next->base,
   3479      1.1  mrg                      (la - sz) * sizeof (cpp_token));
   3480      1.1  mrg 
   3481      1.1  mrg           next->base[0] = pfile->cur_run->limit[-1];
   3482      1.1  mrg         }
   3483      1.1  mrg 
   3484      1.1  mrg       if (sz > 1)
   3485      1.1  mrg         memmove (pfile->cur_token + 1, pfile->cur_token,
   3486      1.1  mrg                  MIN (la, sz - 1) * sizeof (cpp_token));
   3487      1.1  mrg     }
   3488      1.1  mrg 
   3489      1.1  mrg   if (!sz && pfile->cur_token == pfile->cur_run->limit)
   3490      1.1  mrg     {
   3491      1.1  mrg       pfile->cur_run = next_tokenrun (pfile->cur_run);
   3492      1.1  mrg       pfile->cur_token = pfile->cur_run->base;
   3493      1.1  mrg     }
   3494      1.1  mrg 
   3495      1.1  mrg   result = pfile->cur_token++;
   3496      1.1  mrg   result->src_loc = old->src_loc;
   3497      1.1  mrg   return result;
   3498      1.1  mrg }
   3499      1.1  mrg 
   3500      1.1  mrg /* We're at the beginning of a logical line (so not in
   3501      1.1  mrg   directives-mode) and RESULT is a CPP_NAME with NODE_MODULE set.  See
   3502      1.1  mrg   if we should enter deferred_pragma mode to tokenize the rest of the
   3503      1.1  mrg   line as a module control-line.  */
   3504      1.1  mrg 
   3505      1.1  mrg static void
   3506      1.1  mrg cpp_maybe_module_directive (cpp_reader *pfile, cpp_token *result)
   3507      1.1  mrg {
   3508      1.1  mrg   unsigned backup = 0; /* Tokens we peeked.  */
   3509      1.1  mrg   cpp_hashnode *node = result->val.node.node;
   3510      1.1  mrg   cpp_token *peek = result;
   3511      1.1  mrg   cpp_token *keyword = peek;
   3512      1.1  mrg   cpp_hashnode *(&n_modules)[spec_nodes::M_HWM][2] = pfile->spec_nodes.n_modules;
   3513      1.1  mrg   int header_count = 0;
   3514      1.1  mrg 
   3515      1.1  mrg   /* Make sure the incoming state is as we expect it.  This way we
   3516      1.1  mrg      can restore it using constants.  */
   3517      1.1  mrg   gcc_checking_assert (!pfile->state.in_deferred_pragma
   3518      1.1  mrg 		       && !pfile->state.skipping
   3519      1.1  mrg 		       && !pfile->state.parsing_args
   3520      1.1  mrg 		       && !pfile->state.angled_headers
   3521      1.1  mrg 		       && (pfile->state.save_comments
   3522      1.1  mrg 			   == !CPP_OPTION (pfile, discard_comments)));
   3523      1.1  mrg 
   3524      1.1  mrg   /* Enter directives mode sufficiently for peeking.  We don't have
   3525      1.1  mrg      to actually set in_directive.  */
   3526      1.1  mrg   pfile->state.in_deferred_pragma = true;
   3527      1.1  mrg 
   3528      1.1  mrg   /* These two fields are needed to process tokenization in deferred
   3529      1.1  mrg      pragma mode.  They are not used outside deferred pragma mode or
   3530      1.1  mrg      directives mode.  */
   3531      1.1  mrg   pfile->state.pragma_allow_expansion = true;
   3532      1.1  mrg   pfile->directive_line = result->src_loc;
   3533      1.1  mrg 
   3534      1.1  mrg   /* Saving comments is incompatible with directives mode.   */
   3535      1.1  mrg   pfile->state.save_comments = 0;
   3536      1.1  mrg 
   3537      1.1  mrg   if (node == n_modules[spec_nodes::M_EXPORT][0])
   3538      1.1  mrg     {
   3539      1.1  mrg       peek = _cpp_lex_direct (pfile);
   3540      1.1  mrg       keyword = peek;
   3541      1.1  mrg       backup++;
   3542      1.1  mrg       if (keyword->type != CPP_NAME)
   3543      1.1  mrg 	goto not_module;
   3544      1.1  mrg       node = keyword->val.node.node;
   3545      1.1  mrg       if (!(node->flags & NODE_MODULE))
   3546      1.1  mrg 	goto not_module;
   3547      1.1  mrg     }
   3548      1.1  mrg 
   3549      1.1  mrg   if (node == n_modules[spec_nodes::M__IMPORT][0])
   3550      1.1  mrg     /* __import  */
   3551      1.1  mrg     header_count = backup + 2 + 16;
   3552      1.1  mrg   else if (node == n_modules[spec_nodes::M_IMPORT][0])
   3553      1.1  mrg     /* import  */
   3554      1.1  mrg     header_count = backup + 2 + (CPP_OPTION (pfile, preprocessed) ? 16 : 0);
   3555      1.1  mrg   else if (node == n_modules[spec_nodes::M_MODULE][0])
   3556      1.1  mrg     ; /* module  */
   3557      1.1  mrg   else
   3558      1.1  mrg     goto not_module;
   3559      1.1  mrg 
   3560      1.1  mrg   /* We've seen [export] {module|import|__import}.  Check the next token.  */
   3561      1.1  mrg   if (header_count)
   3562      1.1  mrg     /* After '{,__}import' a header name may appear.  */
   3563      1.1  mrg     pfile->state.angled_headers = true;
   3564      1.1  mrg   peek = _cpp_lex_direct (pfile);
   3565      1.1  mrg   backup++;
   3566      1.1  mrg 
   3567      1.1  mrg   /* ... import followed by identifier, ':', '<' or
   3568      1.1  mrg      header-name preprocessing tokens, or module
   3569      1.1  mrg      followed by cpp-identifier, ':' or ';' preprocessing
   3570      1.1  mrg      tokens.  C++ keywords are not yet relevant.  */
   3571      1.1  mrg   if (peek->type == CPP_NAME
   3572      1.1  mrg       || peek->type == CPP_COLON
   3573      1.1  mrg       ||  (header_count
   3574      1.1  mrg 	   ? (peek->type == CPP_LESS
   3575      1.1  mrg 	      || (peek->type == CPP_STRING && peek->val.str.text[0] != 'R')
   3576      1.1  mrg 	      || peek->type == CPP_HEADER_NAME)
   3577      1.1  mrg 	   : peek->type == CPP_SEMICOLON))
   3578      1.1  mrg     {
   3579      1.1  mrg       pfile->state.pragma_allow_expansion = !CPP_OPTION (pfile, preprocessed);
   3580      1.1  mrg       if (!pfile->state.pragma_allow_expansion)
   3581      1.1  mrg 	pfile->state.prevent_expansion++;
   3582      1.1  mrg 
   3583      1.1  mrg       if (!header_count && linemap_included_from
   3584      1.1  mrg 	  (LINEMAPS_LAST_ORDINARY_MAP (pfile->line_table)))
   3585      1.1  mrg 	cpp_error_with_line (pfile, CPP_DL_ERROR, keyword->src_loc, 0,
   3586      1.1  mrg 			     "module control-line cannot be in included file");
   3587      1.1  mrg 
   3588      1.1  mrg       /* The first one or two tokens cannot be macro names.  */
   3589      1.1  mrg       for (int ix = backup; ix--;)
   3590      1.1  mrg 	{
   3591      1.1  mrg 	  cpp_token *tok = ix ? keyword : result;
   3592      1.1  mrg 	  cpp_hashnode *node = tok->val.node.node;
   3593      1.1  mrg 
   3594      1.1  mrg 	  /* Don't attempt to expand the token.  */
   3595      1.1  mrg 	  tok->flags |= NO_EXPAND;
   3596      1.1  mrg 	  if (_cpp_defined_macro_p (node)
   3597      1.1  mrg 	      && _cpp_maybe_notify_macro_use (pfile, node, tok->src_loc)
   3598      1.1  mrg 	      && !cpp_fun_like_macro_p (node))
   3599      1.1  mrg 	    cpp_error_with_line (pfile, CPP_DL_ERROR, tok->src_loc, 0,
   3600      1.1  mrg 				 "module control-line \"%s\" cannot be"
   3601      1.1  mrg 				 " an object-like macro",
   3602      1.1  mrg 				 NODE_NAME (node));
   3603      1.1  mrg 	}
   3604      1.1  mrg 
   3605      1.1  mrg       /* Map to underbar variants.  */
   3606      1.1  mrg       keyword->val.node.node = n_modules[header_count
   3607      1.1  mrg 					 ? spec_nodes::M_IMPORT
   3608      1.1  mrg 					 : spec_nodes::M_MODULE][1];
   3609      1.1  mrg       if (backup != 1)
   3610      1.1  mrg 	result->val.node.node = n_modules[spec_nodes::M_EXPORT][1];
   3611      1.1  mrg 
   3612      1.1  mrg       /* Maybe tell the tokenizer we expect a header-name down the
   3613      1.1  mrg 	 road.  */
   3614      1.1  mrg       pfile->state.directive_file_token = header_count;
   3615      1.1  mrg     }
   3616      1.1  mrg   else
   3617      1.1  mrg     {
   3618      1.1  mrg     not_module:
   3619      1.1  mrg       /* Drop out of directive mode.  */
   3620      1.1  mrg       /* We aaserted save_comments had this value upon entry.  */
   3621      1.1  mrg       pfile->state.save_comments
   3622      1.1  mrg 	= !CPP_OPTION (pfile, discard_comments);
   3623      1.1  mrg       pfile->state.in_deferred_pragma = false;
   3624      1.1  mrg       /* Do not let this remain on.  */
   3625      1.1  mrg       pfile->state.angled_headers = false;
   3626      1.1  mrg     }
   3627      1.1  mrg 
   3628      1.1  mrg   /* In either case we want to backup the peeked tokens.  */
   3629      1.1  mrg   if (backup)
   3630      1.1  mrg     {
   3631      1.1  mrg       /* If we saw EOL, we should drop it, because this isn't a module
   3632      1.1  mrg 	 control-line after all.  */
   3633      1.1  mrg       bool eol = peek->type == CPP_PRAGMA_EOL;
   3634      1.1  mrg       if (!eol || backup > 1)
   3635      1.1  mrg 	{
   3636      1.1  mrg 	  /* Put put the peeked tokens back  */
   3637      1.1  mrg 	  _cpp_backup_tokens_direct (pfile, backup);
   3638      1.1  mrg 	  /* But if the last one was an EOL, forget it.  */
   3639      1.1  mrg 	  if (eol)
   3640      1.1  mrg 	    pfile->lookaheads--;
   3641      1.1  mrg 	}
   3642      1.1  mrg     }
   3643      1.1  mrg }
   3644      1.1  mrg 
   3645      1.1  mrg /* Lex a token into RESULT (external interface).  Takes care of issues
   3646      1.1  mrg    like directive handling, token lookahead, multiple include
   3647      1.1  mrg    optimization and skipping.  */
   3648      1.1  mrg const cpp_token *
   3649      1.1  mrg _cpp_lex_token (cpp_reader *pfile)
   3650      1.1  mrg {
   3651      1.1  mrg   cpp_token *result;
   3652      1.1  mrg 
   3653      1.1  mrg   for (;;)
   3654      1.1  mrg     {
   3655      1.1  mrg       if (pfile->cur_token == pfile->cur_run->limit)
   3656      1.1  mrg 	{
   3657      1.1  mrg 	  pfile->cur_run = next_tokenrun (pfile->cur_run);
   3658      1.1  mrg 	  pfile->cur_token = pfile->cur_run->base;
   3659      1.1  mrg 	}
   3660      1.1  mrg       /* We assume that the current token is somewhere in the current
   3661      1.1  mrg 	 run.  */
   3662      1.1  mrg       if (pfile->cur_token < pfile->cur_run->base
   3663      1.1  mrg 	  || pfile->cur_token >= pfile->cur_run->limit)
   3664      1.1  mrg 	abort ();
   3665      1.1  mrg 
   3666      1.1  mrg       if (pfile->lookaheads)
   3667      1.1  mrg 	{
   3668      1.1  mrg 	  pfile->lookaheads--;
   3669      1.1  mrg 	  result = pfile->cur_token++;
   3670      1.1  mrg 	}
   3671      1.1  mrg       else
   3672      1.1  mrg 	result = _cpp_lex_direct (pfile);
   3673      1.1  mrg 
   3674      1.1  mrg       if (result->flags & BOL)
   3675      1.1  mrg 	{
   3676      1.1  mrg 	  /* Is this a directive.  If _cpp_handle_directive returns
   3677      1.1  mrg 	     false, it is an assembler #.  */
   3678      1.1  mrg 	  if (result->type == CPP_HASH
   3679      1.1  mrg 	      /* 6.10.3 p 11: Directives in a list of macro arguments
   3680      1.1  mrg 		 gives undefined behavior.  This implementation
   3681      1.1  mrg 		 handles the directive as normal.  */
   3682      1.1  mrg 	      && pfile->state.parsing_args != 1)
   3683      1.1  mrg 	    {
   3684      1.1  mrg 	      if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
   3685      1.1  mrg 		{
   3686      1.1  mrg 		  if (pfile->directive_result.type == CPP_PADDING)
   3687      1.1  mrg 		    continue;
   3688      1.1  mrg 		  result = &pfile->directive_result;
   3689      1.1  mrg 		}
   3690      1.1  mrg 	    }
   3691      1.1  mrg 	  else if (pfile->state.in_deferred_pragma)
   3692      1.1  mrg 	    result = &pfile->directive_result;
   3693      1.1  mrg 	  else if (result->type == CPP_NAME
   3694      1.1  mrg 		   && (result->val.node.node->flags & NODE_MODULE)
   3695      1.1  mrg 		   && !pfile->state.skipping
   3696      1.1  mrg 		   /* Unlike regular directives, we do not deal with
   3697      1.1  mrg 		      tokenizing module directives as macro arguments.
   3698      1.1  mrg 		      That's not permitted.  */
   3699      1.1  mrg 		   && !pfile->state.parsing_args)
   3700      1.1  mrg 	    {
   3701      1.1  mrg 	      /* P1857.  Before macro expansion, At start of logical
   3702      1.1  mrg 		 line ... */
   3703      1.1  mrg 	      /* We don't have to consider lookaheads at this point.  */
   3704      1.1  mrg 	      gcc_checking_assert (!pfile->lookaheads);
   3705      1.1  mrg 
   3706      1.1  mrg 	      cpp_maybe_module_directive (pfile, result);
   3707      1.1  mrg 	    }
   3708      1.1  mrg 
   3709      1.1  mrg 	  if (pfile->cb.line_change && !pfile->state.skipping)
   3710      1.1  mrg 	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
   3711      1.1  mrg 	}
   3712      1.1  mrg 
   3713      1.1  mrg       /* We don't skip tokens in directives.  */
   3714      1.1  mrg       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
   3715      1.1  mrg 	break;
   3716      1.1  mrg 
   3717      1.1  mrg       /* Outside a directive, invalidate controlling macros.  At file
   3718      1.1  mrg 	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
   3719      1.1  mrg 	 get here and MI optimization works.  */
   3720      1.1  mrg       pfile->mi_valid = false;
   3721      1.1  mrg 
   3722      1.1  mrg       if (!pfile->state.skipping || result->type == CPP_EOF)
   3723      1.1  mrg 	break;
   3724      1.1  mrg     }
   3725      1.1  mrg 
   3726      1.1  mrg   return result;
   3727      1.1  mrg }
   3728      1.1  mrg 
   3729  1.1.1.3  mrg /* Returns true if a fresh line has been loaded.  */
   3730  1.1.1.3  mrg template <bool lexing_raw_string>
   3731  1.1.1.3  mrg static bool
   3732      1.1  mrg get_fresh_line_impl (cpp_reader *pfile)
   3733  1.1.1.3  mrg {
   3734  1.1.1.3  mrg   /* We can't get a new line until we leave the current directive, unless we
   3735  1.1.1.3  mrg      are lexing a raw string, in which case it will be OK as long as we don't
   3736  1.1.1.3  mrg      pop the current buffer.  */
   3737      1.1  mrg   if (!lexing_raw_string && pfile->state.in_directive)
   3738      1.1  mrg     return false;
   3739      1.1  mrg 
   3740      1.1  mrg   for (;;)
   3741      1.1  mrg     {
   3742      1.1  mrg       cpp_buffer *buffer = pfile->buffer;
   3743      1.1  mrg 
   3744      1.1  mrg       if (!buffer->need_line)
   3745      1.1  mrg 	return true;
   3746      1.1  mrg 
   3747      1.1  mrg       if (buffer->next_line < buffer->rlimit)
   3748      1.1  mrg 	{
   3749      1.1  mrg 	  _cpp_clean_line (pfile);
   3750      1.1  mrg 	  return true;
   3751      1.1  mrg 	}
   3752  1.1.1.3  mrg 
   3753  1.1.1.3  mrg       /* We can't change buffers until we leave the current directive.  */
   3754  1.1.1.3  mrg       if (lexing_raw_string && pfile->state.in_directive)
   3755  1.1.1.3  mrg 	return false;
   3756      1.1  mrg 
   3757      1.1  mrg       /* First, get out of parsing arguments state.  */
   3758      1.1  mrg       if (pfile->state.parsing_args)
   3759      1.1  mrg 	return false;
   3760      1.1  mrg 
   3761      1.1  mrg       /* End of buffer.  Non-empty files should end in a newline.  */
   3762      1.1  mrg       if (buffer->buf != buffer->rlimit
   3763      1.1  mrg 	  && buffer->next_line > buffer->rlimit
   3764      1.1  mrg 	  && !buffer->from_stage3)
   3765      1.1  mrg 	{
   3766      1.1  mrg 	  /* Clip to buffer size.  */
   3767      1.1  mrg 	  buffer->next_line = buffer->rlimit;
   3768      1.1  mrg 	}
   3769      1.1  mrg 
   3770      1.1  mrg       if (buffer->prev && !buffer->return_at_eof)
   3771      1.1  mrg 	_cpp_pop_buffer (pfile);
   3772      1.1  mrg       else
   3773      1.1  mrg 	{
   3774      1.1  mrg 	  /* End of translation.  Do not pop the buffer yet. Increment
   3775      1.1  mrg 	     line number so that the EOF token is on a line of its own
   3776      1.1  mrg 	     (_cpp_lex_direct doesn't increment in that case, because
   3777      1.1  mrg 	     it's hard for it to distinguish this special case). */
   3778      1.1  mrg 	  CPP_INCREMENT_LINE (pfile, 0);
   3779      1.1  mrg 	  return false;
   3780      1.1  mrg 	}
   3781      1.1  mrg     }
   3782      1.1  mrg }
   3783  1.1.1.3  mrg 
   3784  1.1.1.3  mrg bool
   3785  1.1.1.3  mrg _cpp_get_fresh_line (cpp_reader *pfile)
   3786  1.1.1.3  mrg {
   3787  1.1.1.3  mrg   return get_fresh_line_impl<false> (pfile);
   3788  1.1.1.3  mrg }
   3789  1.1.1.3  mrg 
   3790      1.1  mrg 
   3791      1.1  mrg #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)		\
   3792      1.1  mrg   do							\
   3793      1.1  mrg     {							\
   3794      1.1  mrg       result->type = ELSE_TYPE;				\
   3795      1.1  mrg       if (*buffer->cur == CHAR)				\
   3796      1.1  mrg 	buffer->cur++, result->type = THEN_TYPE;	\
   3797      1.1  mrg     }							\
   3798      1.1  mrg   while (0)
   3799      1.1  mrg 
   3800      1.1  mrg /* Lex a token into pfile->cur_token, which is also incremented, to
   3801      1.1  mrg    get diagnostics pointing to the correct location.
   3802      1.1  mrg 
   3803      1.1  mrg    Does not handle issues such as token lookahead, multiple-include
   3804      1.1  mrg    optimization, directives, skipping etc.  This function is only
   3805      1.1  mrg    suitable for use by _cpp_lex_token, and in special cases like
   3806      1.1  mrg    lex_expansion_token which doesn't care for any of these issues.
   3807      1.1  mrg 
   3808      1.1  mrg    When meeting a newline, returns CPP_EOF if parsing a directive,
   3809      1.1  mrg    otherwise returns to the start of the token buffer if permissible.
   3810      1.1  mrg    Returns the location of the lexed token.  */
   3811      1.1  mrg cpp_token *
   3812      1.1  mrg _cpp_lex_direct (cpp_reader *pfile)
   3813  1.1.1.3  mrg {
   3814      1.1  mrg   cppchar_t c = 0;
   3815      1.1  mrg   cpp_buffer *buffer;
   3816      1.1  mrg   const unsigned char *comment_start;
   3817      1.1  mrg   bool fallthrough_comment = false;
   3818      1.1  mrg   cpp_token *result = pfile->cur_token++;
   3819      1.1  mrg 
   3820      1.1  mrg  fresh_line:
   3821      1.1  mrg   result->flags = 0;
   3822      1.1  mrg   buffer = pfile->buffer;
   3823      1.1  mrg   if (buffer->need_line)
   3824      1.1  mrg     {
   3825      1.1  mrg       if (pfile->state.in_deferred_pragma)
   3826      1.1  mrg 	{
   3827      1.1  mrg 	  /* This can happen in cases like:
   3828      1.1  mrg 	     #define loop(x) whatever
   3829      1.1  mrg 	     #pragma omp loop
   3830      1.1  mrg 	     where when trying to expand loop we need to peek
   3831      1.1  mrg 	     next token after loop, but aren't still in_deferred_pragma
   3832      1.1  mrg 	     mode but are in in_directive mode, so buffer->need_line
   3833      1.1  mrg 	     is set, a CPP_EOF is peeked.  */
   3834      1.1  mrg 	  result->type = CPP_PRAGMA_EOL;
   3835      1.1  mrg 	  pfile->state.in_deferred_pragma = false;
   3836      1.1  mrg 	  if (!pfile->state.pragma_allow_expansion)
   3837  1.1.1.3  mrg 	    pfile->state.prevent_expansion--;
   3838      1.1  mrg 	  result->src_loc = pfile->line_table->highest_line;
   3839      1.1  mrg 	  return result;
   3840      1.1  mrg 	}
   3841      1.1  mrg       if (!_cpp_get_fresh_line (pfile))
   3842      1.1  mrg 	{
   3843      1.1  mrg 	  result->type = CPP_EOF;
   3844      1.1  mrg 	  /* Not a real EOF in a directive or arg parsing -- we refuse
   3845      1.1  mrg   	     to advance to the next file now, and will once we're out
   3846      1.1  mrg   	     of those modes.  */
   3847      1.1  mrg 	  if (!pfile->state.in_directive && !pfile->state.parsing_args)
   3848      1.1  mrg 	    {
   3849      1.1  mrg 	      /* Tell the compiler the line number of the EOF token.  */
   3850      1.1  mrg 	      result->src_loc = pfile->line_table->highest_line;
   3851      1.1  mrg 	      result->flags = BOL;
   3852      1.1  mrg 	      /* Now pop the buffer that _cpp_get_fresh_line did not.  */
   3853      1.1  mrg 	      _cpp_pop_buffer (pfile);
   3854  1.1.1.3  mrg 	    }
   3855  1.1.1.3  mrg 	  else if (c == 0)
   3856      1.1  mrg 	    result->src_loc = pfile->line_table->highest_line;
   3857      1.1  mrg 	  return result;
   3858      1.1  mrg 	}
   3859      1.1  mrg       if (buffer != pfile->buffer)
   3860      1.1  mrg 	fallthrough_comment = false;
   3861      1.1  mrg       if (!pfile->keep_tokens)
   3862      1.1  mrg 	{
   3863      1.1  mrg 	  pfile->cur_run = &pfile->base_run;
   3864      1.1  mrg 	  result = pfile->base_run.base;
   3865      1.1  mrg 	  pfile->cur_token = result + 1;
   3866      1.1  mrg 	}
   3867      1.1  mrg       result->flags = BOL;
   3868      1.1  mrg       if (pfile->state.parsing_args == 2)
   3869      1.1  mrg 	result->flags |= PREV_WHITE;
   3870      1.1  mrg     }
   3871      1.1  mrg   buffer = pfile->buffer;
   3872      1.1  mrg  update_tokens_line:
   3873      1.1  mrg   result->src_loc = pfile->line_table->highest_line;
   3874      1.1  mrg 
   3875      1.1  mrg  skipped_white:
   3876      1.1  mrg   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
   3877      1.1  mrg       && !pfile->overlaid_buffer)
   3878      1.1  mrg     {
   3879      1.1  mrg       _cpp_process_line_notes (pfile, false);
   3880      1.1  mrg       result->src_loc = pfile->line_table->highest_line;
   3881      1.1  mrg     }
   3882      1.1  mrg   c = *buffer->cur++;
   3883      1.1  mrg 
   3884      1.1  mrg   if (pfile->forced_token_location)
   3885      1.1  mrg     result->src_loc = pfile->forced_token_location;
   3886      1.1  mrg   else
   3887      1.1  mrg     result->src_loc = linemap_position_for_column (pfile->line_table,
   3888      1.1  mrg 					  CPP_BUF_COLUMN (buffer, buffer->cur));
   3889      1.1  mrg 
   3890      1.1  mrg   switch (c)
   3891      1.1  mrg     {
   3892      1.1  mrg     case ' ': case '\t': case '\f': case '\v': case '\0':
   3893      1.1  mrg       result->flags |= PREV_WHITE;
   3894      1.1  mrg       skip_whitespace (pfile, c);
   3895      1.1  mrg       goto skipped_white;
   3896      1.1  mrg 
   3897      1.1  mrg     case '\n':
   3898      1.1  mrg       /* Increment the line, unless this is the last line ...  */
   3899      1.1  mrg       if (buffer->cur < buffer->rlimit
   3900      1.1  mrg 	  /* ... or this is a #include, (where _cpp_stack_file needs to
   3901      1.1  mrg 	     unwind by one line) ...  */
   3902      1.1  mrg 	  || (pfile->state.in_directive > 1
   3903      1.1  mrg 	      /* ... except traditional-cpp increments this elsewhere.  */
   3904      1.1  mrg 	      && !CPP_OPTION (pfile, traditional)))
   3905      1.1  mrg 	CPP_INCREMENT_LINE (pfile, 0);
   3906      1.1  mrg       buffer->need_line = true;
   3907      1.1  mrg       if (pfile->state.in_deferred_pragma)
   3908      1.1  mrg 	{
   3909      1.1  mrg 	  /* Produce the PRAGMA_EOL on this line.  File reading
   3910      1.1  mrg 	     ensures there is always a \n at end of the buffer, thus
   3911      1.1  mrg 	     in a deferred pragma we always see CPP_PRAGMA_EOL before
   3912      1.1  mrg 	     any CPP_EOF.  */
   3913      1.1  mrg 	  result->type = CPP_PRAGMA_EOL;
   3914      1.1  mrg 	  result->flags &= ~PREV_WHITE;
   3915      1.1  mrg 	  pfile->state.in_deferred_pragma = false;
   3916      1.1  mrg 	  if (!pfile->state.pragma_allow_expansion)
   3917      1.1  mrg 	    pfile->state.prevent_expansion--;
   3918      1.1  mrg 	  return result;
   3919      1.1  mrg 	}
   3920      1.1  mrg       goto fresh_line;
   3921      1.1  mrg 
   3922      1.1  mrg     case '0': case '1': case '2': case '3': case '4':
   3923      1.1  mrg     case '5': case '6': case '7': case '8': case '9':
   3924      1.1  mrg       {
   3925      1.1  mrg 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
   3926      1.1  mrg 	result->type = CPP_NUMBER;
   3927  1.1.1.3  mrg 	lex_number (pfile, &result->val.str, &nst);
   3928      1.1  mrg 	warn_about_normalization (pfile, result, &nst, false);
   3929      1.1  mrg 	break;
   3930      1.1  mrg       }
   3931      1.1  mrg 
   3932      1.1  mrg     case 'L':
   3933      1.1  mrg     case 'u':
   3934      1.1  mrg     case 'U':
   3935      1.1  mrg     case 'R':
   3936      1.1  mrg       /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
   3937      1.1  mrg 	 wide strings or raw strings.  */
   3938      1.1  mrg       if (c == 'L' || CPP_OPTION (pfile, rliterals)
   3939      1.1  mrg 	  || (c != 'R' && CPP_OPTION (pfile, uliterals)))
   3940      1.1  mrg 	{
   3941      1.1  mrg 	  if ((*buffer->cur == '\'' && c != 'R')
   3942      1.1  mrg 	      || *buffer->cur == '"'
   3943      1.1  mrg 	      || (*buffer->cur == 'R'
   3944      1.1  mrg 		  && c != 'R'
   3945      1.1  mrg 		  && buffer->cur[1] == '"'
   3946      1.1  mrg 		  && CPP_OPTION (pfile, rliterals))
   3947      1.1  mrg 	      || (*buffer->cur == '8'
   3948      1.1  mrg 		  && c == 'u'
   3949      1.1  mrg 		  && ((buffer->cur[1] == '"' || (buffer->cur[1] == '\''
   3950      1.1  mrg 				&& CPP_OPTION (pfile, utf8_char_literals)))
   3951      1.1  mrg 		      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
   3952      1.1  mrg 			  && CPP_OPTION (pfile, rliterals)))))
   3953      1.1  mrg 	    {
   3954      1.1  mrg 	      lex_string (pfile, result, buffer->cur - 1);
   3955      1.1  mrg 	      break;
   3956      1.1  mrg 	    }
   3957      1.1  mrg 	}
   3958      1.1  mrg       /* Fall through.  */
   3959      1.1  mrg 
   3960      1.1  mrg     case '_':
   3961      1.1  mrg     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
   3962      1.1  mrg     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
   3963      1.1  mrg     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
   3964      1.1  mrg     case 's': case 't':           case 'v': case 'w': case 'x':
   3965      1.1  mrg     case 'y': case 'z':
   3966      1.1  mrg     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
   3967      1.1  mrg     case 'G': case 'H': case 'I': case 'J': case 'K':
   3968      1.1  mrg     case 'M': case 'N': case 'O': case 'P': case 'Q':
   3969      1.1  mrg     case 'S': case 'T':           case 'V': case 'W': case 'X':
   3970      1.1  mrg     case 'Y': case 'Z':
   3971      1.1  mrg       result->type = CPP_NAME;
   3972      1.1  mrg       {
   3973  1.1.1.3  mrg 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
   3974  1.1.1.3  mrg 	const auto node = lex_identifier (pfile, buffer->cur - 1, false, &nst,
   3975  1.1.1.3  mrg 					  &result->val.node.spelling);
   3976  1.1.1.3  mrg 	result->val.node.node = node;
   3977  1.1.1.3  mrg 	identifier_diagnostics_on_lex (pfile, node);
   3978      1.1  mrg 	warn_about_normalization (pfile, result, &nst, true);
   3979      1.1  mrg       }
   3980      1.1  mrg 
   3981      1.1  mrg       /* Convert named operators to their proper types.  */
   3982      1.1  mrg       if (result->val.node.node->flags & NODE_OPERATOR)
   3983      1.1  mrg 	{
   3984      1.1  mrg 	  result->flags |= NAMED_OP;
   3985      1.1  mrg 	  result->type = (enum cpp_ttype) result->val.node.node->directive_index;
   3986      1.1  mrg 	}
   3987      1.1  mrg 
   3988      1.1  mrg       /* Signal FALLTHROUGH comment followed by another token.  */
   3989      1.1  mrg       if (fallthrough_comment)
   3990      1.1  mrg 	result->flags |= PREV_FALLTHROUGH;
   3991      1.1  mrg       break;
   3992      1.1  mrg 
   3993      1.1  mrg     case '\'':
   3994      1.1  mrg     case '"':
   3995      1.1  mrg       lex_string (pfile, result, buffer->cur - 1);
   3996      1.1  mrg       break;
   3997      1.1  mrg 
   3998      1.1  mrg     case '/':
   3999      1.1  mrg       /* A potential block or line comment.  */
   4000      1.1  mrg       comment_start = buffer->cur;
   4001      1.1  mrg       c = *buffer->cur;
   4002      1.1  mrg 
   4003      1.1  mrg       if (c == '*')
   4004      1.1  mrg 	{
   4005      1.1  mrg 	  if (_cpp_skip_block_comment (pfile))
   4006      1.1  mrg 	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
   4007      1.1  mrg 	}
   4008      1.1  mrg       else if (c == '/' && ! CPP_OPTION (pfile, traditional))
   4009      1.1  mrg 	{
   4010      1.1  mrg 	  /* Don't warn for system headers.  */
   4011      1.1  mrg 	  if (_cpp_in_system_header (pfile))
   4012      1.1  mrg 	    ;
   4013      1.1  mrg 	  /* Warn about comments if pedantically GNUC89, and not
   4014      1.1  mrg 	     in system headers.  */
   4015      1.1  mrg 	  else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
   4016      1.1  mrg 		   && CPP_PEDANTIC (pfile)
   4017      1.1  mrg 		   && ! buffer->warned_cplusplus_comments)
   4018      1.1  mrg 	    {
   4019      1.1  mrg 	      if (cpp_error (pfile, CPP_DL_PEDWARN,
   4020      1.1  mrg 			     "C++ style comments are not allowed in ISO C90"))
   4021      1.1  mrg 		cpp_error (pfile, CPP_DL_NOTE,
   4022      1.1  mrg 			   "(this will be reported only once per input file)");
   4023      1.1  mrg 	      buffer->warned_cplusplus_comments = 1;
   4024      1.1  mrg 	    }
   4025      1.1  mrg 	  /* Or if specifically desired via -Wc90-c99-compat.  */
   4026      1.1  mrg 	  else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
   4027      1.1  mrg 		   && ! CPP_OPTION (pfile, cplusplus)
   4028      1.1  mrg 		   && ! buffer->warned_cplusplus_comments)
   4029      1.1  mrg 	    {
   4030      1.1  mrg 	      if (cpp_error (pfile, CPP_DL_WARNING,
   4031      1.1  mrg 			     "C++ style comments are incompatible with C90"))
   4032      1.1  mrg 		cpp_error (pfile, CPP_DL_NOTE,
   4033      1.1  mrg 			   "(this will be reported only once per input file)");
   4034      1.1  mrg 	      buffer->warned_cplusplus_comments = 1;
   4035      1.1  mrg 	    }
   4036      1.1  mrg 	  /* In C89/C94, C++ style comments are forbidden.  */
   4037      1.1  mrg 	  else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
   4038      1.1  mrg 		    || CPP_OPTION (pfile, lang) == CLK_STDC94))
   4039      1.1  mrg 	    {
   4040      1.1  mrg 	      /* But don't be confused about valid code such as
   4041      1.1  mrg 	         - // immediately followed by *,
   4042      1.1  mrg 		 - // in a preprocessing directive,
   4043      1.1  mrg 		 - // in an #if 0 block.  */
   4044      1.1  mrg 	      if (buffer->cur[1] == '*'
   4045      1.1  mrg 		  || pfile->state.in_directive
   4046      1.1  mrg 		  || pfile->state.skipping)
   4047      1.1  mrg 		{
   4048      1.1  mrg 		  result->type = CPP_DIV;
   4049      1.1  mrg 		  break;
   4050      1.1  mrg 		}
   4051      1.1  mrg 	      else if (! buffer->warned_cplusplus_comments)
   4052      1.1  mrg 		{
   4053      1.1  mrg 		  if (cpp_error (pfile, CPP_DL_ERROR,
   4054      1.1  mrg 				 "C++ style comments are not allowed in "
   4055      1.1  mrg 				 "ISO C90"))
   4056      1.1  mrg 		    cpp_error (pfile, CPP_DL_NOTE,
   4057      1.1  mrg 			       "(this will be reported only once per input "
   4058      1.1  mrg 			       "file)");
   4059      1.1  mrg 		  buffer->warned_cplusplus_comments = 1;
   4060      1.1  mrg 		}
   4061      1.1  mrg 	    }
   4062      1.1  mrg 	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
   4063      1.1  mrg 	    cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
   4064      1.1  mrg 	}
   4065      1.1  mrg       else if (c == '=')
   4066      1.1  mrg 	{
   4067      1.1  mrg 	  buffer->cur++;
   4068      1.1  mrg 	  result->type = CPP_DIV_EQ;
   4069      1.1  mrg 	  break;
   4070      1.1  mrg 	}
   4071      1.1  mrg       else
   4072      1.1  mrg 	{
   4073      1.1  mrg 	  result->type = CPP_DIV;
   4074      1.1  mrg 	  break;
   4075      1.1  mrg 	}
   4076      1.1  mrg 
   4077      1.1  mrg       if (fallthrough_comment_p (pfile, comment_start))
   4078      1.1  mrg 	fallthrough_comment = true;
   4079      1.1  mrg 
   4080      1.1  mrg       if (pfile->cb.comment)
   4081      1.1  mrg 	{
   4082      1.1  mrg 	  size_t len = pfile->buffer->cur - comment_start;
   4083      1.1  mrg 	  pfile->cb.comment (pfile, result->src_loc, comment_start - 1,
   4084      1.1  mrg 			     len + 1);
   4085      1.1  mrg 	}
   4086      1.1  mrg 
   4087      1.1  mrg       if (!pfile->state.save_comments)
   4088      1.1  mrg 	{
   4089      1.1  mrg 	  result->flags |= PREV_WHITE;
   4090      1.1  mrg 	  goto update_tokens_line;
   4091      1.1  mrg 	}
   4092      1.1  mrg 
   4093      1.1  mrg       if (fallthrough_comment)
   4094      1.1  mrg 	result->flags |= PREV_FALLTHROUGH;
   4095      1.1  mrg 
   4096      1.1  mrg       /* Save the comment as a token in its own right.  */
   4097      1.1  mrg       save_comment (pfile, result, comment_start, c);
   4098      1.1  mrg       break;
   4099      1.1  mrg 
   4100      1.1  mrg     case '<':
   4101      1.1  mrg       if (pfile->state.angled_headers)
   4102      1.1  mrg 	{
   4103      1.1  mrg 	  lex_string (pfile, result, buffer->cur - 1);
   4104      1.1  mrg 	  if (result->type != CPP_LESS)
   4105      1.1  mrg 	    break;
   4106      1.1  mrg 	}
   4107      1.1  mrg 
   4108      1.1  mrg       result->type = CPP_LESS;
   4109      1.1  mrg       if (*buffer->cur == '=')
   4110      1.1  mrg 	{
   4111      1.1  mrg 	  buffer->cur++, result->type = CPP_LESS_EQ;
   4112      1.1  mrg 	  if (*buffer->cur == '>'
   4113      1.1  mrg 	      && CPP_OPTION (pfile, cplusplus)
   4114      1.1  mrg 	      && CPP_OPTION (pfile, lang) >= CLK_GNUCXX20)
   4115      1.1  mrg 	    buffer->cur++, result->type = CPP_SPACESHIP;
   4116      1.1  mrg 	}
   4117      1.1  mrg       else if (*buffer->cur == '<')
   4118      1.1  mrg 	{
   4119      1.1  mrg 	  buffer->cur++;
   4120      1.1  mrg 	  IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
   4121      1.1  mrg 	}
   4122      1.1  mrg       else if (CPP_OPTION (pfile, digraphs))
   4123      1.1  mrg 	{
   4124      1.1  mrg 	  if (*buffer->cur == ':')
   4125      1.1  mrg 	    {
   4126      1.1  mrg 	      /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
   4127      1.1  mrg 		 three characters are <:: and the subsequent character
   4128      1.1  mrg 		 is neither : nor >, the < is treated as a preprocessor
   4129      1.1  mrg 		 token by itself".  */
   4130      1.1  mrg 	      if (CPP_OPTION (pfile, cplusplus)
   4131      1.1  mrg 		  && CPP_OPTION (pfile, lang) != CLK_CXX98
   4132      1.1  mrg 		  && CPP_OPTION (pfile, lang) != CLK_GNUCXX
   4133      1.1  mrg 		  && buffer->cur[1] == ':'
   4134      1.1  mrg 		  && buffer->cur[2] != ':' && buffer->cur[2] != '>')
   4135      1.1  mrg 		break;
   4136      1.1  mrg 
   4137      1.1  mrg 	      buffer->cur++;
   4138      1.1  mrg 	      result->flags |= DIGRAPH;
   4139      1.1  mrg 	      result->type = CPP_OPEN_SQUARE;
   4140      1.1  mrg 	    }
   4141      1.1  mrg 	  else if (*buffer->cur == '%')
   4142      1.1  mrg 	    {
   4143      1.1  mrg 	      buffer->cur++;
   4144      1.1  mrg 	      result->flags |= DIGRAPH;
   4145      1.1  mrg 	      result->type = CPP_OPEN_BRACE;
   4146      1.1  mrg 	    }
   4147      1.1  mrg 	}
   4148      1.1  mrg       break;
   4149      1.1  mrg 
   4150      1.1  mrg     case '>':
   4151      1.1  mrg       result->type = CPP_GREATER;
   4152      1.1  mrg       if (*buffer->cur == '=')
   4153      1.1  mrg 	buffer->cur++, result->type = CPP_GREATER_EQ;
   4154      1.1  mrg       else if (*buffer->cur == '>')
   4155      1.1  mrg 	{
   4156      1.1  mrg 	  buffer->cur++;
   4157      1.1  mrg 	  IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
   4158      1.1  mrg 	}
   4159      1.1  mrg       break;
   4160      1.1  mrg 
   4161      1.1  mrg     case '%':
   4162      1.1  mrg       result->type = CPP_MOD;
   4163      1.1  mrg       if (*buffer->cur == '=')
   4164      1.1  mrg 	buffer->cur++, result->type = CPP_MOD_EQ;
   4165      1.1  mrg       else if (CPP_OPTION (pfile, digraphs))
   4166      1.1  mrg 	{
   4167      1.1  mrg 	  if (*buffer->cur == ':')
   4168      1.1  mrg 	    {
   4169      1.1  mrg 	      buffer->cur++;
   4170      1.1  mrg 	      result->flags |= DIGRAPH;
   4171      1.1  mrg 	      result->type = CPP_HASH;
   4172      1.1  mrg 	      if (*buffer->cur == '%' && buffer->cur[1] == ':')
   4173      1.1  mrg 		buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
   4174      1.1  mrg 	    }
   4175      1.1  mrg 	  else if (*buffer->cur == '>')
   4176      1.1  mrg 	    {
   4177      1.1  mrg 	      buffer->cur++;
   4178      1.1  mrg 	      result->flags |= DIGRAPH;
   4179      1.1  mrg 	      result->type = CPP_CLOSE_BRACE;
   4180      1.1  mrg 	    }
   4181      1.1  mrg 	}
   4182      1.1  mrg       break;
   4183      1.1  mrg 
   4184      1.1  mrg     case '.':
   4185      1.1  mrg       result->type = CPP_DOT;
   4186      1.1  mrg       if (ISDIGIT (*buffer->cur))
   4187      1.1  mrg 	{
   4188      1.1  mrg 	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
   4189      1.1  mrg 	  result->type = CPP_NUMBER;
   4190  1.1.1.3  mrg 	  lex_number (pfile, &result->val.str, &nst);
   4191      1.1  mrg 	  warn_about_normalization (pfile, result, &nst, false);
   4192      1.1  mrg 	}
   4193      1.1  mrg       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
   4194      1.1  mrg 	buffer->cur += 2, result->type = CPP_ELLIPSIS;
   4195      1.1  mrg       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
   4196      1.1  mrg 	buffer->cur++, result->type = CPP_DOT_STAR;
   4197      1.1  mrg       break;
   4198      1.1  mrg 
   4199      1.1  mrg     case '+':
   4200      1.1  mrg       result->type = CPP_PLUS;
   4201      1.1  mrg       if (*buffer->cur == '+')
   4202      1.1  mrg 	buffer->cur++, result->type = CPP_PLUS_PLUS;
   4203      1.1  mrg       else if (*buffer->cur == '=')
   4204      1.1  mrg 	buffer->cur++, result->type = CPP_PLUS_EQ;
   4205      1.1  mrg       break;
   4206      1.1  mrg 
   4207      1.1  mrg     case '-':
   4208      1.1  mrg       result->type = CPP_MINUS;
   4209      1.1  mrg       if (*buffer->cur == '>')
   4210      1.1  mrg 	{
   4211      1.1  mrg 	  buffer->cur++;
   4212      1.1  mrg 	  result->type = CPP_DEREF;
   4213      1.1  mrg 	  if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
   4214      1.1  mrg 	    buffer->cur++, result->type = CPP_DEREF_STAR;
   4215      1.1  mrg 	}
   4216      1.1  mrg       else if (*buffer->cur == '-')
   4217      1.1  mrg 	buffer->cur++, result->type = CPP_MINUS_MINUS;
   4218      1.1  mrg       else if (*buffer->cur == '=')
   4219      1.1  mrg 	buffer->cur++, result->type = CPP_MINUS_EQ;
   4220      1.1  mrg       break;
   4221      1.1  mrg 
   4222      1.1  mrg     case '&':
   4223      1.1  mrg       result->type = CPP_AND;
   4224      1.1  mrg       if (*buffer->cur == '&')
   4225      1.1  mrg 	buffer->cur++, result->type = CPP_AND_AND;
   4226      1.1  mrg       else if (*buffer->cur == '=')
   4227      1.1  mrg 	buffer->cur++, result->type = CPP_AND_EQ;
   4228      1.1  mrg       break;
   4229      1.1  mrg 
   4230      1.1  mrg     case '|':
   4231      1.1  mrg       result->type = CPP_OR;
   4232      1.1  mrg       if (*buffer->cur == '|')
   4233      1.1  mrg 	buffer->cur++, result->type = CPP_OR_OR;
   4234      1.1  mrg       else if (*buffer->cur == '=')
   4235      1.1  mrg 	buffer->cur++, result->type = CPP_OR_EQ;
   4236      1.1  mrg       break;
   4237      1.1  mrg 
   4238      1.1  mrg     case ':':
   4239  1.1.1.2  mrg       result->type = CPP_COLON;
   4240  1.1.1.2  mrg       if (*buffer->cur == ':')
   4241  1.1.1.2  mrg 	{
   4242  1.1.1.2  mrg 	  if (CPP_OPTION (pfile, scope))
   4243  1.1.1.2  mrg 	    buffer->cur++, result->type = CPP_SCOPE;
   4244  1.1.1.2  mrg 	  else
   4245  1.1.1.2  mrg 	    result->flags |= COLON_SCOPE;
   4246      1.1  mrg 	}
   4247      1.1  mrg       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
   4248      1.1  mrg 	{
   4249      1.1  mrg 	  buffer->cur++;
   4250      1.1  mrg 	  result->flags |= DIGRAPH;
   4251      1.1  mrg 	  result->type = CPP_CLOSE_SQUARE;
   4252      1.1  mrg 	}
   4253      1.1  mrg       break;
   4254      1.1  mrg 
   4255      1.1  mrg     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
   4256      1.1  mrg     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
   4257      1.1  mrg     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
   4258      1.1  mrg     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
   4259      1.1  mrg     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
   4260      1.1  mrg 
   4261      1.1  mrg     case '?': result->type = CPP_QUERY; break;
   4262      1.1  mrg     case '~': result->type = CPP_COMPL; break;
   4263      1.1  mrg     case ',': result->type = CPP_COMMA; break;
   4264      1.1  mrg     case '(': result->type = CPP_OPEN_PAREN; break;
   4265      1.1  mrg     case ')': result->type = CPP_CLOSE_PAREN; break;
   4266      1.1  mrg     case '[': result->type = CPP_OPEN_SQUARE; break;
   4267      1.1  mrg     case ']': result->type = CPP_CLOSE_SQUARE; break;
   4268      1.1  mrg     case '{': result->type = CPP_OPEN_BRACE; break;
   4269      1.1  mrg     case '}': result->type = CPP_CLOSE_BRACE; break;
   4270      1.1  mrg     case ';': result->type = CPP_SEMICOLON; break;
   4271      1.1  mrg 
   4272      1.1  mrg       /* @ is a punctuator in Objective-C.  */
   4273      1.1  mrg     case '@': result->type = CPP_ATSIGN; break;
   4274      1.1  mrg 
   4275      1.1  mrg     default:
   4276      1.1  mrg       {
   4277  1.1.1.3  mrg 	const uchar *base = --buffer->cur;
   4278      1.1  mrg 	static int no_warn_cnt;
   4279      1.1  mrg 
   4280      1.1  mrg 	/* Check for an extended identifier ($ or UCN or UTF-8).  */
   4281      1.1  mrg 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
   4282      1.1  mrg 	if (forms_identifier_p (pfile, true, &nst))
   4283      1.1  mrg 	  {
   4284  1.1.1.3  mrg 	    result->type = CPP_NAME;
   4285  1.1.1.3  mrg 	    const auto node = lex_identifier (pfile, base, true, &nst,
   4286  1.1.1.3  mrg 					      &result->val.node.spelling);
   4287  1.1.1.3  mrg 	    result->val.node.node = node;
   4288  1.1.1.3  mrg 	    identifier_diagnostics_on_lex (pfile, node);
   4289      1.1  mrg 	    warn_about_normalization (pfile, result, &nst, true);
   4290      1.1  mrg 	    break;
   4291      1.1  mrg 	  }
   4292      1.1  mrg 
   4293      1.1  mrg 	/* Otherwise this will form a CPP_OTHER token.  Parse valid UTF-8 as a
   4294      1.1  mrg 	   single token.  */
   4295      1.1  mrg 	buffer->cur++;
   4296      1.1  mrg 	if (c >= utf8_signifier)
   4297      1.1  mrg 	  {
   4298      1.1  mrg 	    const uchar *pstr = base;
   4299      1.1  mrg 	    cppchar_t s;
   4300  1.1.1.3  mrg 	    if (_cpp_valid_utf8 (pfile, &pstr, buffer->rlimit, 0, NULL, &s))
   4301  1.1.1.3  mrg 	      {
   4302  1.1.1.3  mrg 		if (s > UCS_LIMIT && CPP_OPTION (pfile, cpp_warn_invalid_utf8))
   4303  1.1.1.3  mrg 		  {
   4304  1.1.1.3  mrg 		    buffer->cur = base;
   4305  1.1.1.3  mrg 		    _cpp_warn_invalid_utf8 (pfile);
   4306  1.1.1.3  mrg 		  }
   4307  1.1.1.3  mrg 		buffer->cur = pstr;
   4308  1.1.1.3  mrg 	      }
   4309  1.1.1.3  mrg 	    else if (CPP_OPTION (pfile, cpp_warn_invalid_utf8))
   4310  1.1.1.3  mrg 	      {
   4311  1.1.1.3  mrg 		buffer->cur = base;
   4312  1.1.1.3  mrg 		const uchar *end = _cpp_warn_invalid_utf8 (pfile);
   4313  1.1.1.3  mrg 		buffer->cur = base + 1;
   4314  1.1.1.3  mrg 		no_warn_cnt = end - buffer->cur;
   4315  1.1.1.3  mrg 	      }
   4316  1.1.1.3  mrg 	  }
   4317  1.1.1.3  mrg 	else if (c >= utf8_continuation
   4318  1.1.1.3  mrg 		 && CPP_OPTION (pfile, cpp_warn_invalid_utf8))
   4319  1.1.1.3  mrg 	  {
   4320  1.1.1.3  mrg 	    if (no_warn_cnt)
   4321  1.1.1.3  mrg 	      --no_warn_cnt;
   4322  1.1.1.3  mrg 	    else
   4323  1.1.1.3  mrg 	      {
   4324  1.1.1.3  mrg 		buffer->cur = base;
   4325  1.1.1.3  mrg 		_cpp_warn_invalid_utf8 (pfile);
   4326  1.1.1.3  mrg 		buffer->cur = base + 1;
   4327      1.1  mrg 	      }
   4328      1.1  mrg 	  }
   4329      1.1  mrg 	create_literal (pfile, result, base, buffer->cur - base, CPP_OTHER);
   4330      1.1  mrg 	break;
   4331      1.1  mrg       }
   4332      1.1  mrg 
   4333      1.1  mrg     }
   4334      1.1  mrg 
   4335      1.1  mrg   /* Potentially convert the location of the token to a range.  */
   4336      1.1  mrg   if (result->src_loc >= RESERVED_LOCATION_COUNT
   4337      1.1  mrg       && result->type != CPP_EOF)
   4338      1.1  mrg     {
   4339      1.1  mrg       /* Ensure that any line notes are processed, so that we have the
   4340      1.1  mrg 	 correct physical line/column for the end-point of the token even
   4341      1.1  mrg 	 when a logical line is split via one or more backslashes.  */
   4342      1.1  mrg       if (buffer->cur >= buffer->notes[buffer->cur_note].pos
   4343      1.1  mrg 	  && !pfile->overlaid_buffer)
   4344      1.1  mrg 	_cpp_process_line_notes (pfile, false);
   4345      1.1  mrg 
   4346      1.1  mrg       source_range tok_range;
   4347      1.1  mrg       tok_range.m_start = result->src_loc;
   4348      1.1  mrg       tok_range.m_finish
   4349      1.1  mrg 	= linemap_position_for_column (pfile->line_table,
   4350      1.1  mrg 				       CPP_BUF_COLUMN (buffer, buffer->cur));
   4351  1.1.1.3  mrg 
   4352  1.1.1.3  mrg       result->src_loc
   4353  1.1.1.3  mrg 	= pfile->line_table->get_or_create_combined_loc (result->src_loc,
   4354      1.1  mrg 							 tok_range, nullptr, 0);
   4355      1.1  mrg     }
   4356      1.1  mrg 
   4357      1.1  mrg   return result;
   4358      1.1  mrg }
   4359      1.1  mrg 
   4360      1.1  mrg /* An upper bound on the number of bytes needed to spell TOKEN.
   4361      1.1  mrg    Does not include preceding whitespace.  */
   4362      1.1  mrg unsigned int
   4363      1.1  mrg cpp_token_len (const cpp_token *token)
   4364      1.1  mrg {
   4365      1.1  mrg   unsigned int len;
   4366      1.1  mrg 
   4367      1.1  mrg   switch (TOKEN_SPELL (token))
   4368      1.1  mrg     {
   4369      1.1  mrg     default:		len = 6;				break;
   4370      1.1  mrg     case SPELL_LITERAL:	len = token->val.str.len;		break;
   4371      1.1  mrg     case SPELL_IDENT:	len = NODE_LEN (token->val.node.node) * 10;	break;
   4372      1.1  mrg     }
   4373      1.1  mrg 
   4374      1.1  mrg   return len;
   4375      1.1  mrg }
   4376      1.1  mrg 
   4377      1.1  mrg /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
   4378      1.1  mrg    Return the number of bytes read out of NAME.  (There are always
   4379      1.1  mrg    10 bytes written to BUFFER.)  */
   4380      1.1  mrg 
   4381      1.1  mrg static size_t
   4382      1.1  mrg utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
   4383      1.1  mrg {
   4384      1.1  mrg   int j;
   4385      1.1  mrg   int ucn_len = 0;
   4386      1.1  mrg   int ucn_len_c;
   4387      1.1  mrg   unsigned t;
   4388      1.1  mrg   unsigned long utf32;
   4389      1.1  mrg 
   4390      1.1  mrg   /* Compute the length of the UTF-8 sequence.  */
   4391      1.1  mrg   for (t = *name; t & 0x80; t <<= 1)
   4392      1.1  mrg     ucn_len++;
   4393      1.1  mrg 
   4394      1.1  mrg   utf32 = *name & (0x7F >> ucn_len);
   4395      1.1  mrg   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
   4396      1.1  mrg     {
   4397      1.1  mrg       utf32 = (utf32 << 6) | (*++name & 0x3F);
   4398      1.1  mrg 
   4399      1.1  mrg       /* Ill-formed UTF-8.  */
   4400      1.1  mrg       if ((*name & ~0x3F) != 0x80)
   4401      1.1  mrg 	abort ();
   4402      1.1  mrg     }
   4403      1.1  mrg 
   4404      1.1  mrg   *buffer++ = '\\';
   4405      1.1  mrg   *buffer++ = 'U';
   4406      1.1  mrg   for (j = 7; j >= 0; j--)
   4407      1.1  mrg     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
   4408      1.1  mrg   return ucn_len;
   4409      1.1  mrg }
   4410      1.1  mrg 
   4411      1.1  mrg /* Given a token TYPE corresponding to a digraph, return a pointer to
   4412      1.1  mrg    the spelling of the digraph.  */
   4413      1.1  mrg static const unsigned char *
   4414      1.1  mrg cpp_digraph2name (enum cpp_ttype type)
   4415      1.1  mrg {
   4416      1.1  mrg   return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
   4417      1.1  mrg }
   4418      1.1  mrg 
   4419  1.1.1.3  mrg /* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
   4420      1.1  mrg    The buffer must already contain enough space to hold the
   4421      1.1  mrg    token's spelling.  Returns a pointer to the character after the
   4422      1.1  mrg    last character written.  */
   4423      1.1  mrg unsigned char *
   4424      1.1  mrg _cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
   4425      1.1  mrg {
   4426      1.1  mrg   size_t i;
   4427      1.1  mrg   const unsigned char *name = NODE_NAME (ident);
   4428      1.1  mrg 
   4429      1.1  mrg   for (i = 0; i < NODE_LEN (ident); i++)
   4430      1.1  mrg     if (name[i] & ~0x7F)
   4431      1.1  mrg       {
   4432      1.1  mrg 	i += utf8_to_ucn (buffer, name + i) - 1;
   4433      1.1  mrg 	buffer += 10;
   4434      1.1  mrg       }
   4435      1.1  mrg     else
   4436      1.1  mrg       *buffer++ = name[i];
   4437      1.1  mrg 
   4438      1.1  mrg   return buffer;
   4439      1.1  mrg }
   4440      1.1  mrg 
   4441  1.1.1.3  mrg /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
   4442      1.1  mrg    already contain enough space to hold the token's spelling.
   4443      1.1  mrg    Returns a pointer to the character after the last character written.
   4444      1.1  mrg    FORSTRING is true if this is to be the spelling after translation
   4445      1.1  mrg    phase 1 (with the original spelling of extended identifiers), false
   4446      1.1  mrg    if extended identifiers should always be written using UCNs (there is
   4447      1.1  mrg    no option for always writing them in the internal UTF-8 form).
   4448      1.1  mrg    FIXME: Would be nice if we didn't need the PFILE argument.  */
   4449      1.1  mrg unsigned char *
   4450      1.1  mrg cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
   4451      1.1  mrg 		 unsigned char *buffer, bool forstring)
   4452      1.1  mrg {
   4453      1.1  mrg   switch (TOKEN_SPELL (token))
   4454      1.1  mrg     {
   4455      1.1  mrg     case SPELL_OPERATOR:
   4456      1.1  mrg       {
   4457      1.1  mrg 	const unsigned char *spelling;
   4458      1.1  mrg 	unsigned char c;
   4459      1.1  mrg 
   4460      1.1  mrg 	if (token->flags & DIGRAPH)
   4461      1.1  mrg 	  spelling = cpp_digraph2name (token->type);
   4462      1.1  mrg 	else if (token->flags & NAMED_OP)
   4463      1.1  mrg 	  goto spell_ident;
   4464      1.1  mrg 	else
   4465      1.1  mrg 	  spelling = TOKEN_NAME (token);
   4466      1.1  mrg 
   4467      1.1  mrg 	while ((c = *spelling++) != '\0')
   4468      1.1  mrg 	  *buffer++ = c;
   4469      1.1  mrg       }
   4470      1.1  mrg       break;
   4471      1.1  mrg 
   4472      1.1  mrg     spell_ident:
   4473      1.1  mrg     case SPELL_IDENT:
   4474      1.1  mrg       if (forstring)
   4475      1.1  mrg 	{
   4476      1.1  mrg 	  memcpy (buffer, NODE_NAME (token->val.node.spelling),
   4477      1.1  mrg 		  NODE_LEN (token->val.node.spelling));
   4478      1.1  mrg 	  buffer += NODE_LEN (token->val.node.spelling);
   4479      1.1  mrg 	}
   4480      1.1  mrg       else
   4481      1.1  mrg 	buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
   4482      1.1  mrg       break;
   4483      1.1  mrg 
   4484      1.1  mrg     case SPELL_LITERAL:
   4485      1.1  mrg       memcpy (buffer, token->val.str.text, token->val.str.len);
   4486      1.1  mrg       buffer += token->val.str.len;
   4487      1.1  mrg       break;
   4488      1.1  mrg 
   4489      1.1  mrg     case SPELL_NONE:
   4490      1.1  mrg       cpp_error (pfile, CPP_DL_ICE,
   4491      1.1  mrg 		 "unspellable token %s", TOKEN_NAME (token));
   4492      1.1  mrg       break;
   4493      1.1  mrg     }
   4494      1.1  mrg 
   4495      1.1  mrg   return buffer;
   4496      1.1  mrg }
   4497      1.1  mrg 
   4498      1.1  mrg /* Returns TOKEN spelt as a null-terminated string.  The string is
   4499      1.1  mrg    freed when the reader is destroyed.  Useful for diagnostics.  */
   4500      1.1  mrg unsigned char *
   4501      1.1  mrg cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
   4502      1.1  mrg {
   4503      1.1  mrg   unsigned int len = cpp_token_len (token) + 1;
   4504      1.1  mrg   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
   4505      1.1  mrg 
   4506      1.1  mrg   end = cpp_spell_token (pfile, token, start, false);
   4507      1.1  mrg   end[0] = '\0';
   4508      1.1  mrg 
   4509      1.1  mrg   return start;
   4510      1.1  mrg }
   4511      1.1  mrg 
   4512      1.1  mrg /* Returns a pointer to a string which spells the token defined by
   4513      1.1  mrg    TYPE and FLAGS.  Used by C front ends, which really should move to
   4514      1.1  mrg    using cpp_token_as_text.  */
   4515      1.1  mrg const char *
   4516      1.1  mrg cpp_type2name (enum cpp_ttype type, unsigned char flags)
   4517      1.1  mrg {
   4518      1.1  mrg   if (flags & DIGRAPH)
   4519      1.1  mrg     return (const char *) cpp_digraph2name (type);
   4520      1.1  mrg   else if (flags & NAMED_OP)
   4521      1.1  mrg     return cpp_named_operator2name (type);
   4522      1.1  mrg 
   4523      1.1  mrg   return (const char *) token_spellings[type].name;
   4524      1.1  mrg }
   4525      1.1  mrg 
   4526      1.1  mrg /* Writes the spelling of token to FP, without any preceding space.
   4527      1.1  mrg    Separated from cpp_spell_token for efficiency - to avoid stdio
   4528      1.1  mrg    double-buffering.  */
   4529      1.1  mrg void
   4530      1.1  mrg cpp_output_token (const cpp_token *token, FILE *fp)
   4531      1.1  mrg {
   4532      1.1  mrg   switch (TOKEN_SPELL (token))
   4533      1.1  mrg     {
   4534      1.1  mrg     case SPELL_OPERATOR:
   4535      1.1  mrg       {
   4536      1.1  mrg 	const unsigned char *spelling;
   4537      1.1  mrg 	int c;
   4538      1.1  mrg 
   4539      1.1  mrg 	if (token->flags & DIGRAPH)
   4540      1.1  mrg 	  spelling = cpp_digraph2name (token->type);
   4541      1.1  mrg 	else if (token->flags & NAMED_OP)
   4542      1.1  mrg 	  goto spell_ident;
   4543      1.1  mrg 	else
   4544      1.1  mrg 	  spelling = TOKEN_NAME (token);
   4545      1.1  mrg 
   4546      1.1  mrg 	c = *spelling;
   4547      1.1  mrg 	do
   4548      1.1  mrg 	  putc (c, fp);
   4549      1.1  mrg 	while ((c = *++spelling) != '\0');
   4550      1.1  mrg       }
   4551      1.1  mrg       break;
   4552      1.1  mrg 
   4553      1.1  mrg     spell_ident:
   4554      1.1  mrg     case SPELL_IDENT:
   4555      1.1  mrg       {
   4556      1.1  mrg 	size_t i;
   4557      1.1  mrg 	const unsigned char * name = NODE_NAME (token->val.node.node);
   4558      1.1  mrg 
   4559      1.1  mrg 	for (i = 0; i < NODE_LEN (token->val.node.node); i++)
   4560      1.1  mrg 	  if (name[i] & ~0x7F)
   4561      1.1  mrg 	    {
   4562      1.1  mrg 	      unsigned char buffer[10];
   4563      1.1  mrg 	      i += utf8_to_ucn (buffer, name + i) - 1;
   4564      1.1  mrg 	      fwrite (buffer, 1, 10, fp);
   4565      1.1  mrg 	    }
   4566      1.1  mrg 	  else
   4567      1.1  mrg 	    fputc (NODE_NAME (token->val.node.node)[i], fp);
   4568      1.1  mrg       }
   4569      1.1  mrg       break;
   4570      1.1  mrg 
   4571      1.1  mrg     case SPELL_LITERAL:
   4572      1.1  mrg       if (token->type == CPP_HEADER_NAME)
   4573      1.1  mrg 	fputc ('"', fp);
   4574      1.1  mrg       fwrite (token->val.str.text, 1, token->val.str.len, fp);
   4575      1.1  mrg       if (token->type == CPP_HEADER_NAME)
   4576      1.1  mrg 	fputc ('"', fp);
   4577      1.1  mrg       break;
   4578      1.1  mrg 
   4579      1.1  mrg     case SPELL_NONE:
   4580      1.1  mrg       /* An error, most probably.  */
   4581      1.1  mrg       break;
   4582      1.1  mrg     }
   4583      1.1  mrg }
   4584      1.1  mrg 
   4585      1.1  mrg /* Compare two tokens.  */
   4586      1.1  mrg int
   4587      1.1  mrg _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
   4588      1.1  mrg {
   4589      1.1  mrg   if (a->type == b->type && a->flags == b->flags)
   4590      1.1  mrg     switch (TOKEN_SPELL (a))
   4591      1.1  mrg       {
   4592      1.1  mrg       default:			/* Keep compiler happy.  */
   4593      1.1  mrg       case SPELL_OPERATOR:
   4594      1.1  mrg 	/* token_no is used to track where multiple consecutive ##
   4595      1.1  mrg 	   tokens were originally located.  */
   4596      1.1  mrg 	return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
   4597      1.1  mrg       case SPELL_NONE:
   4598      1.1  mrg 	return (a->type != CPP_MACRO_ARG
   4599      1.1  mrg 		|| (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
   4600      1.1  mrg 		    && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
   4601      1.1  mrg       case SPELL_IDENT:
   4602      1.1  mrg 	return (a->val.node.node == b->val.node.node
   4603      1.1  mrg 		&& a->val.node.spelling == b->val.node.spelling);
   4604      1.1  mrg       case SPELL_LITERAL:
   4605      1.1  mrg 	return (a->val.str.len == b->val.str.len
   4606      1.1  mrg 		&& !memcmp (a->val.str.text, b->val.str.text,
   4607      1.1  mrg 			    a->val.str.len));
   4608      1.1  mrg       }
   4609      1.1  mrg 
   4610      1.1  mrg   return 0;
   4611      1.1  mrg }
   4612      1.1  mrg 
   4613      1.1  mrg /* Returns nonzero if a space should be inserted to avoid an
   4614      1.1  mrg    accidental token paste for output.  For simplicity, it is
   4615      1.1  mrg    conservative, and occasionally advises a space where one is not
   4616      1.1  mrg    needed, e.g. "." and ".2".  */
   4617      1.1  mrg int
   4618      1.1  mrg cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
   4619      1.1  mrg 		 const cpp_token *token2)
   4620      1.1  mrg {
   4621      1.1  mrg   enum cpp_ttype a = token1->type, b = token2->type;
   4622      1.1  mrg   cppchar_t c;
   4623      1.1  mrg 
   4624      1.1  mrg   if (token1->flags & NAMED_OP)
   4625      1.1  mrg     a = CPP_NAME;
   4626      1.1  mrg   if (token2->flags & NAMED_OP)
   4627      1.1  mrg     b = CPP_NAME;
   4628      1.1  mrg 
   4629      1.1  mrg   c = EOF;
   4630      1.1  mrg   if (token2->flags & DIGRAPH)
   4631      1.1  mrg     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
   4632      1.1  mrg   else if (token_spellings[b].category == SPELL_OPERATOR)
   4633      1.1  mrg     c = token_spellings[b].name[0];
   4634      1.1  mrg 
   4635      1.1  mrg   /* Quickly get everything that can paste with an '='.  */
   4636      1.1  mrg   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
   4637      1.1  mrg     return 1;
   4638      1.1  mrg 
   4639      1.1  mrg   switch (a)
   4640      1.1  mrg     {
   4641      1.1  mrg     case CPP_GREATER:	return c == '>';
   4642      1.1  mrg     case CPP_LESS:	return c == '<' || c == '%' || c == ':';
   4643      1.1  mrg     case CPP_PLUS:	return c == '+';
   4644      1.1  mrg     case CPP_MINUS:	return c == '-' || c == '>';
   4645      1.1  mrg     case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
   4646      1.1  mrg     case CPP_MOD:	return c == ':' || c == '>';
   4647      1.1  mrg     case CPP_AND:	return c == '&';
   4648      1.1  mrg     case CPP_OR:	return c == '|';
   4649      1.1  mrg     case CPP_COLON:	return c == ':' || c == '>';
   4650      1.1  mrg     case CPP_DEREF:	return c == '*';
   4651      1.1  mrg     case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
   4652      1.1  mrg     case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
   4653      1.1  mrg     case CPP_PRAGMA:
   4654      1.1  mrg     case CPP_NAME:	return ((b == CPP_NUMBER
   4655      1.1  mrg 				 && name_p (pfile, &token2->val.str))
   4656      1.1  mrg 				|| b == CPP_NAME
   4657      1.1  mrg 				|| b == CPP_CHAR || b == CPP_STRING); /* L */
   4658      1.1  mrg     case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
   4659      1.1  mrg 				|| b == CPP_CHAR
   4660      1.1  mrg 				|| c == '.' || c == '+' || c == '-');
   4661      1.1  mrg 				      /* UCNs */
   4662      1.1  mrg     case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
   4663      1.1  mrg 				 && b == CPP_NAME)
   4664      1.1  mrg 				|| (CPP_OPTION (pfile, objc)
   4665      1.1  mrg 				    && token1->val.str.text[0] == '@'
   4666      1.1  mrg 				    && (b == CPP_NAME || b == CPP_STRING)));
   4667      1.1  mrg     case CPP_LESS_EQ:	return c == '>';
   4668      1.1  mrg     case CPP_STRING:
   4669      1.1  mrg     case CPP_WSTRING:
   4670      1.1  mrg     case CPP_UTF8STRING:
   4671      1.1  mrg     case CPP_STRING16:
   4672      1.1  mrg     case CPP_STRING32:	return (CPP_OPTION (pfile, user_literals)
   4673      1.1  mrg 				&& (b == CPP_NAME
   4674      1.1  mrg 				    || (TOKEN_SPELL (token2) == SPELL_LITERAL
   4675      1.1  mrg 					&& ISIDST (token2->val.str.text[0]))));
   4676      1.1  mrg 
   4677      1.1  mrg     default:		break;
   4678      1.1  mrg     }
   4679      1.1  mrg 
   4680      1.1  mrg   return 0;
   4681      1.1  mrg }
   4682      1.1  mrg 
   4683      1.1  mrg /* Output all the remaining tokens on the current line, and a newline
   4684      1.1  mrg    character, to FP.  Leading whitespace is removed.  If there are
   4685      1.1  mrg    macros, special token padding is not performed.  */
   4686      1.1  mrg void
   4687      1.1  mrg cpp_output_line (cpp_reader *pfile, FILE *fp)
   4688      1.1  mrg {
   4689      1.1  mrg   const cpp_token *token;
   4690      1.1  mrg 
   4691      1.1  mrg   token = cpp_get_token (pfile);
   4692      1.1  mrg   while (token->type != CPP_EOF)
   4693      1.1  mrg     {
   4694      1.1  mrg       cpp_output_token (token, fp);
   4695      1.1  mrg       token = cpp_get_token (pfile);
   4696      1.1  mrg       if (token->flags & PREV_WHITE)
   4697      1.1  mrg 	putc (' ', fp);
   4698      1.1  mrg     }
   4699      1.1  mrg 
   4700      1.1  mrg   putc ('\n', fp);
   4701      1.1  mrg }
   4702      1.1  mrg 
   4703      1.1  mrg /* Return a string representation of all the remaining tokens on the
   4704      1.1  mrg    current line.  The result is allocated using xmalloc and must be
   4705      1.1  mrg    freed by the caller.  */
   4706      1.1  mrg unsigned char *
   4707      1.1  mrg cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
   4708      1.1  mrg {
   4709      1.1  mrg   const cpp_token *token;
   4710      1.1  mrg   unsigned int out = dir_name ? ustrlen (dir_name) : 0;
   4711      1.1  mrg   unsigned int alloced = 120 + out;
   4712      1.1  mrg   unsigned char *result = (unsigned char *) xmalloc (alloced);
   4713      1.1  mrg 
   4714      1.1  mrg   /* If DIR_NAME is empty, there are no initial contents.  */
   4715      1.1  mrg   if (dir_name)
   4716      1.1  mrg     {
   4717      1.1  mrg       sprintf ((char *) result, "#%s ", dir_name);
   4718      1.1  mrg       out += 2;
   4719      1.1  mrg     }
   4720      1.1  mrg 
   4721      1.1  mrg   token = cpp_get_token (pfile);
   4722      1.1  mrg   while (token->type != CPP_EOF)
   4723      1.1  mrg     {
   4724      1.1  mrg       unsigned char *last;
   4725      1.1  mrg       /* Include room for a possible space and the terminating nul.  */
   4726      1.1  mrg       unsigned int len = cpp_token_len (token) + 2;
   4727      1.1  mrg 
   4728      1.1  mrg       if (out + len > alloced)
   4729      1.1  mrg 	{
   4730      1.1  mrg 	  alloced *= 2;
   4731      1.1  mrg 	  if (out + len > alloced)
   4732      1.1  mrg 	    alloced = out + len;
   4733      1.1  mrg 	  result = (unsigned char *) xrealloc (result, alloced);
   4734      1.1  mrg 	}
   4735      1.1  mrg 
   4736      1.1  mrg       last = cpp_spell_token (pfile, token, &result[out], 0);
   4737      1.1  mrg       out = last - result;
   4738      1.1  mrg 
   4739      1.1  mrg       token = cpp_get_token (pfile);
   4740      1.1  mrg       if (token->flags & PREV_WHITE)
   4741      1.1  mrg 	result[out++] = ' ';
   4742      1.1  mrg     }
   4743      1.1  mrg 
   4744      1.1  mrg   result[out] = '\0';
   4745      1.1  mrg   return result;
   4746      1.1  mrg }
   4747      1.1  mrg 
   4748      1.1  mrg /* Memory buffers.  Changing these three constants can have a dramatic
   4749      1.1  mrg    effect on performance.  The values here are reasonable defaults,
   4750      1.1  mrg    but might be tuned.  If you adjust them, be sure to test across a
   4751      1.1  mrg    range of uses of cpplib, including heavy nested function-like macro
   4752      1.1  mrg    expansion.  Also check the change in peak memory usage (NJAMD is a
   4753      1.1  mrg    good tool for this).  */
   4754      1.1  mrg #define MIN_BUFF_SIZE 8000
   4755      1.1  mrg #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
   4756      1.1  mrg #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
   4757      1.1  mrg 	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
   4758      1.1  mrg 
   4759      1.1  mrg #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
   4760      1.1  mrg   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
   4761      1.1  mrg #endif
   4762      1.1  mrg 
   4763      1.1  mrg /* Create a new allocation buffer.  Place the control block at the end
   4764      1.1  mrg    of the buffer, so that buffer overflows will cause immediate chaos.  */
   4765      1.1  mrg static _cpp_buff *
   4766      1.1  mrg new_buff (size_t len)
   4767      1.1  mrg {
   4768      1.1  mrg   _cpp_buff *result;
   4769      1.1  mrg   unsigned char *base;
   4770      1.1  mrg 
   4771      1.1  mrg   if (len < MIN_BUFF_SIZE)
   4772      1.1  mrg     len = MIN_BUFF_SIZE;
   4773      1.1  mrg   len = CPP_ALIGN (len);
   4774  1.1.1.3  mrg 
   4775      1.1  mrg #ifdef ENABLE_VALGRIND_WORKAROUNDS
   4776      1.1  mrg   /* Valgrind warns about uses of interior pointers, so put _cpp_buff
   4777      1.1  mrg      struct first.  */
   4778      1.1  mrg   size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
   4779      1.1  mrg   base = XNEWVEC (unsigned char, len + slen);
   4780      1.1  mrg   result = (_cpp_buff *) base;
   4781      1.1  mrg   base += slen;
   4782      1.1  mrg #else
   4783      1.1  mrg   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
   4784      1.1  mrg   result = (_cpp_buff *) (base + len);
   4785      1.1  mrg #endif
   4786      1.1  mrg   result->base = base;
   4787      1.1  mrg   result->cur = base;
   4788      1.1  mrg   result->limit = base + len;
   4789      1.1  mrg   result->next = NULL;
   4790      1.1  mrg   return result;
   4791      1.1  mrg }
   4792      1.1  mrg 
   4793      1.1  mrg /* Place a chain of unwanted allocation buffers on the free list.  */
   4794      1.1  mrg void
   4795      1.1  mrg _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
   4796      1.1  mrg {
   4797      1.1  mrg   _cpp_buff *end = buff;
   4798      1.1  mrg 
   4799      1.1  mrg   while (end->next)
   4800      1.1  mrg     end = end->next;
   4801      1.1  mrg   end->next = pfile->free_buffs;
   4802      1.1  mrg   pfile->free_buffs = buff;
   4803      1.1  mrg }
   4804      1.1  mrg 
   4805      1.1  mrg /* Return a free buffer of size at least MIN_SIZE.  */
   4806      1.1  mrg _cpp_buff *
   4807      1.1  mrg _cpp_get_buff (cpp_reader *pfile, size_t min_size)
   4808      1.1  mrg {
   4809      1.1  mrg   _cpp_buff *result, **p;
   4810      1.1  mrg 
   4811      1.1  mrg   for (p = &pfile->free_buffs;; p = &(*p)->next)
   4812      1.1  mrg     {
   4813      1.1  mrg       size_t size;
   4814      1.1  mrg 
   4815      1.1  mrg       if (*p == NULL)
   4816      1.1  mrg 	return new_buff (min_size);
   4817      1.1  mrg       result = *p;
   4818      1.1  mrg       size = result->limit - result->base;
   4819      1.1  mrg       /* Return a buffer that's big enough, but don't waste one that's
   4820      1.1  mrg          way too big.  */
   4821      1.1  mrg       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
   4822      1.1  mrg 	break;
   4823      1.1  mrg     }
   4824      1.1  mrg 
   4825      1.1  mrg   *p = result->next;
   4826      1.1  mrg   result->next = NULL;
   4827      1.1  mrg   result->cur = result->base;
   4828      1.1  mrg   return result;
   4829      1.1  mrg }
   4830      1.1  mrg 
   4831      1.1  mrg /* Creates a new buffer with enough space to hold the uncommitted
   4832      1.1  mrg    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
   4833      1.1  mrg    the excess bytes to the new buffer.  Chains the new buffer after
   4834      1.1  mrg    BUFF, and returns the new buffer.  */
   4835      1.1  mrg _cpp_buff *
   4836      1.1  mrg _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
   4837      1.1  mrg {
   4838      1.1  mrg   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
   4839      1.1  mrg   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
   4840      1.1  mrg 
   4841      1.1  mrg   buff->next = new_buff;
   4842      1.1  mrg   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
   4843      1.1  mrg   return new_buff;
   4844      1.1  mrg }
   4845      1.1  mrg 
   4846      1.1  mrg /* Creates a new buffer with enough space to hold the uncommitted
   4847      1.1  mrg    remaining bytes of the buffer pointed to by BUFF, and at least
   4848      1.1  mrg    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
   4849      1.1  mrg    Chains the new buffer before the buffer pointed to by BUFF, and
   4850      1.1  mrg    updates the pointer to point to the new buffer.  */
   4851      1.1  mrg void
   4852      1.1  mrg _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
   4853      1.1  mrg {
   4854      1.1  mrg   _cpp_buff *new_buff, *old_buff = *pbuff;
   4855      1.1  mrg   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
   4856      1.1  mrg 
   4857      1.1  mrg   new_buff = _cpp_get_buff (pfile, size);
   4858      1.1  mrg   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
   4859      1.1  mrg   new_buff->next = old_buff;
   4860      1.1  mrg   *pbuff = new_buff;
   4861      1.1  mrg }
   4862      1.1  mrg 
   4863      1.1  mrg /* Free a chain of buffers starting at BUFF.  */
   4864      1.1  mrg void
   4865      1.1  mrg _cpp_free_buff (_cpp_buff *buff)
   4866      1.1  mrg {
   4867      1.1  mrg   _cpp_buff *next;
   4868      1.1  mrg 
   4869      1.1  mrg   for (; buff; buff = next)
   4870      1.1  mrg     {
   4871  1.1.1.3  mrg       next = buff->next;
   4872      1.1  mrg #ifdef ENABLE_VALGRIND_WORKAROUNDS
   4873      1.1  mrg       free (buff);
   4874      1.1  mrg #else
   4875      1.1  mrg       free (buff->base);
   4876      1.1  mrg #endif
   4877      1.1  mrg     }
   4878      1.1  mrg }
   4879      1.1  mrg 
   4880      1.1  mrg /* Allocate permanent, unaligned storage of length LEN.  */
   4881      1.1  mrg unsigned char *
   4882      1.1  mrg _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
   4883      1.1  mrg {
   4884      1.1  mrg   _cpp_buff *buff = pfile->u_buff;
   4885      1.1  mrg   unsigned char *result = buff->cur;
   4886      1.1  mrg 
   4887      1.1  mrg   if (len > (size_t) (buff->limit - result))
   4888      1.1  mrg     {
   4889      1.1  mrg       buff = _cpp_get_buff (pfile, len);
   4890      1.1  mrg       buff->next = pfile->u_buff;
   4891      1.1  mrg       pfile->u_buff = buff;
   4892      1.1  mrg       result = buff->cur;
   4893      1.1  mrg     }
   4894      1.1  mrg 
   4895      1.1  mrg   buff->cur = result + len;
   4896      1.1  mrg   return result;
   4897      1.1  mrg }
   4898      1.1  mrg 
   4899      1.1  mrg /* Allocate permanent, unaligned storage of length LEN from a_buff.
   4900      1.1  mrg    That buffer is used for growing allocations when saving macro
   4901      1.1  mrg    replacement lists in a #define, and when parsing an answer to an
   4902      1.1  mrg    assertion in #assert, #unassert or #if (and therefore possibly
   4903      1.1  mrg    whilst expanding macros).  It therefore must not be used by any
   4904      1.1  mrg    code that they might call: specifically the lexer and the guts of
   4905      1.1  mrg    the macro expander.
   4906      1.1  mrg 
   4907      1.1  mrg    All existing other uses clearly fit this restriction: storing
   4908      1.1  mrg    registered pragmas during initialization.  */
   4909      1.1  mrg unsigned char *
   4910      1.1  mrg _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
   4911      1.1  mrg {
   4912      1.1  mrg   _cpp_buff *buff = pfile->a_buff;
   4913      1.1  mrg   unsigned char *result = buff->cur;
   4914      1.1  mrg 
   4915      1.1  mrg   if (len > (size_t) (buff->limit - result))
   4916      1.1  mrg     {
   4917      1.1  mrg       buff = _cpp_get_buff (pfile, len);
   4918      1.1  mrg       buff->next = pfile->a_buff;
   4919      1.1  mrg       pfile->a_buff = buff;
   4920      1.1  mrg       result = buff->cur;
   4921      1.1  mrg     }
   4922      1.1  mrg 
   4923      1.1  mrg   buff->cur = result + len;
   4924      1.1  mrg   return result;
   4925      1.1  mrg }
   4926      1.1  mrg 
   4927      1.1  mrg /* Commit or allocate storage from a buffer.  */
   4928      1.1  mrg 
   4929      1.1  mrg void *
   4930      1.1  mrg _cpp_commit_buff (cpp_reader *pfile, size_t size)
   4931      1.1  mrg {
   4932      1.1  mrg   void *ptr = BUFF_FRONT (pfile->a_buff);
   4933      1.1  mrg 
   4934      1.1  mrg   if (pfile->hash_table->alloc_subobject)
   4935      1.1  mrg     {
   4936      1.1  mrg       void *copy = pfile->hash_table->alloc_subobject (size);
   4937      1.1  mrg       memcpy (copy, ptr, size);
   4938      1.1  mrg       ptr = copy;
   4939      1.1  mrg     }
   4940      1.1  mrg   else
   4941      1.1  mrg     BUFF_FRONT (pfile->a_buff) += size;
   4942      1.1  mrg 
   4943      1.1  mrg   return ptr;
   4944      1.1  mrg }
   4945      1.1  mrg 
   4946      1.1  mrg /* Say which field of TOK is in use.  */
   4947      1.1  mrg 
   4948      1.1  mrg enum cpp_token_fld_kind
   4949      1.1  mrg cpp_token_val_index (const cpp_token *tok)
   4950      1.1  mrg {
   4951      1.1  mrg   switch (TOKEN_SPELL (tok))
   4952      1.1  mrg     {
   4953      1.1  mrg     case SPELL_IDENT:
   4954      1.1  mrg       return CPP_TOKEN_FLD_NODE;
   4955      1.1  mrg     case SPELL_LITERAL:
   4956      1.1  mrg       return CPP_TOKEN_FLD_STR;
   4957      1.1  mrg     case SPELL_OPERATOR:
   4958      1.1  mrg       /* Operands which were originally spelled as ident keep around
   4959      1.1  mrg          the node for the exact spelling.  */
   4960      1.1  mrg       if (tok->flags & NAMED_OP)
   4961      1.1  mrg 	return CPP_TOKEN_FLD_NODE;
   4962      1.1  mrg       else if (tok->type == CPP_PASTE)
   4963      1.1  mrg 	return CPP_TOKEN_FLD_TOKEN_NO;
   4964      1.1  mrg       else
   4965      1.1  mrg 	return CPP_TOKEN_FLD_NONE;
   4966      1.1  mrg     case SPELL_NONE:
   4967      1.1  mrg       if (tok->type == CPP_MACRO_ARG)
   4968      1.1  mrg 	return CPP_TOKEN_FLD_ARG_NO;
   4969      1.1  mrg       else if (tok->type == CPP_PADDING)
   4970      1.1  mrg 	return CPP_TOKEN_FLD_SOURCE;
   4971      1.1  mrg       else if (tok->type == CPP_PRAGMA)
   4972      1.1  mrg 	return CPP_TOKEN_FLD_PRAGMA;
   4973      1.1  mrg       /* fall through */
   4974      1.1  mrg     default:
   4975      1.1  mrg       return CPP_TOKEN_FLD_NONE;
   4976      1.1  mrg     }
   4977      1.1  mrg }
   4978      1.1  mrg 
   4979      1.1  mrg /* All tokens lexed in R after calling this function will be forced to
   4980      1.1  mrg    have their location_t to be P, until
   4981      1.1  mrg    cpp_stop_forcing_token_locations is called for R.  */
   4982      1.1  mrg 
   4983      1.1  mrg void
   4984      1.1  mrg cpp_force_token_locations (cpp_reader *r, location_t loc)
   4985      1.1  mrg {
   4986      1.1  mrg   r->forced_token_location = loc;
   4987      1.1  mrg }
   4988      1.1  mrg 
   4989      1.1  mrg /* Go back to assigning locations naturally for lexed tokens.  */
   4990      1.1  mrg 
   4991      1.1  mrg void
   4992      1.1  mrg cpp_stop_forcing_token_locations (cpp_reader *r)
   4993      1.1  mrg {
   4994      1.1  mrg   r->forced_token_location = 0;
   4995      1.1  mrg }
   4996      1.1  mrg 
   4997      1.1  mrg /* We're looking at \, if it's escaping EOL, look past it.  If at
   4998      1.1  mrg    LIMIT, don't advance.  */
   4999      1.1  mrg 
   5000      1.1  mrg static const unsigned char *
   5001      1.1  mrg do_peek_backslash (const unsigned char *peek, const unsigned char *limit)
   5002      1.1  mrg {
   5003      1.1  mrg   const unsigned char *probe = peek;
   5004      1.1  mrg 
   5005      1.1  mrg   if (__builtin_expect (peek[1] == '\n', true))
   5006      1.1  mrg     {
   5007      1.1  mrg     eol:
   5008      1.1  mrg       probe += 2;
   5009      1.1  mrg       if (__builtin_expect (probe < limit, true))
   5010      1.1  mrg 	{
   5011      1.1  mrg 	  peek = probe;
   5012      1.1  mrg 	  if (*peek == '\\')
   5013      1.1  mrg 	    /* The user might be perverse.  */
   5014      1.1  mrg 	    return do_peek_backslash (peek, limit);
   5015      1.1  mrg 	}
   5016      1.1  mrg     }
   5017      1.1  mrg   else if (__builtin_expect (peek[1] == '\r', false))
   5018      1.1  mrg     {
   5019      1.1  mrg       if (probe[2] == '\n')
   5020      1.1  mrg 	probe++;
   5021      1.1  mrg       goto eol;
   5022      1.1  mrg     }
   5023      1.1  mrg 
   5024      1.1  mrg   return peek;
   5025      1.1  mrg }
   5026      1.1  mrg 
   5027      1.1  mrg static const unsigned char *
   5028      1.1  mrg do_peek_next (const unsigned char *peek, const unsigned char *limit)
   5029      1.1  mrg {
   5030      1.1  mrg   if (__builtin_expect (*peek == '\\', false))
   5031      1.1  mrg     peek = do_peek_backslash (peek, limit);
   5032      1.1  mrg   return peek;
   5033      1.1  mrg }
   5034      1.1  mrg 
   5035      1.1  mrg static const unsigned char *
   5036      1.1  mrg do_peek_prev (const unsigned char *peek, const unsigned char *bound)
   5037      1.1  mrg {
   5038      1.1  mrg   if (peek == bound)
   5039      1.1  mrg     return NULL;
   5040      1.1  mrg 
   5041      1.1  mrg   unsigned char c = *--peek;
   5042      1.1  mrg   if (__builtin_expect (c == '\n', false)
   5043      1.1  mrg       || __builtin_expect (c == 'r', false))
   5044      1.1  mrg     {
   5045      1.1  mrg       if (peek == bound)
   5046      1.1  mrg 	return peek;
   5047      1.1  mrg       int ix = -1;
   5048      1.1  mrg       if (c == '\n' && peek[ix] == '\r')
   5049      1.1  mrg 	{
   5050      1.1  mrg 	  if (peek + ix == bound)
   5051      1.1  mrg 	    return peek;
   5052      1.1  mrg 	  ix--;
   5053      1.1  mrg 	}
   5054      1.1  mrg 
   5055      1.1  mrg       if (peek[ix] == '\\')
   5056      1.1  mrg 	return do_peek_prev (peek + ix, bound);
   5057      1.1  mrg 
   5058      1.1  mrg       return peek;
   5059      1.1  mrg     }
   5060      1.1  mrg   else
   5061      1.1  mrg     return peek;
   5062      1.1  mrg }
   5063      1.1  mrg 
   5064      1.1  mrg /* If PEEK[-1] is identifier MATCH, scan past it and trailing white
   5065      1.1  mrg    space.  Otherwise return NULL.  */
   5066      1.1  mrg 
   5067      1.1  mrg static const unsigned char *
   5068      1.1  mrg do_peek_ident (const char *match, const unsigned char *peek,
   5069      1.1  mrg 	       const unsigned char *limit)
   5070      1.1  mrg {
   5071      1.1  mrg   for (; *++match; peek++)
   5072      1.1  mrg     if (*peek != *match)
   5073      1.1  mrg       {
   5074      1.1  mrg 	peek = do_peek_next (peek, limit);
   5075      1.1  mrg 	if (*peek != *match)
   5076      1.1  mrg 	  return NULL;
   5077      1.1  mrg       }
   5078      1.1  mrg 
   5079      1.1  mrg   /* Must now not be looking at an identifier char.  */
   5080      1.1  mrg   peek = do_peek_next (peek, limit);
   5081      1.1  mrg   if (ISIDNUM (*peek))
   5082      1.1  mrg     return NULL;
   5083      1.1  mrg 
   5084      1.1  mrg   /* Skip control-line whitespace.  */
   5085      1.1  mrg  ws:
   5086      1.1  mrg   while (*peek == ' ' || *peek == '\t')
   5087      1.1  mrg     peek++;
   5088      1.1  mrg   if (__builtin_expect (*peek == '\\', false))
   5089      1.1  mrg     {
   5090      1.1  mrg       peek = do_peek_backslash (peek, limit);
   5091      1.1  mrg       if (*peek != '\\')
   5092      1.1  mrg 	goto ws;
   5093      1.1  mrg     }
   5094      1.1  mrg 
   5095      1.1  mrg   return peek;
   5096      1.1  mrg }
   5097      1.1  mrg 
   5098      1.1  mrg /* Are we looking at a module control line starting as PEEK - 1?  */
   5099      1.1  mrg 
   5100      1.1  mrg static bool
   5101      1.1  mrg do_peek_module (cpp_reader *pfile, unsigned char c,
   5102      1.1  mrg 		const unsigned char *peek, const unsigned char *limit)
   5103      1.1  mrg {
   5104      1.1  mrg   bool import = false;
   5105      1.1  mrg 
   5106      1.1  mrg   if (__builtin_expect (c == 'e', false))
   5107      1.1  mrg     {
   5108      1.1  mrg       if (!((peek[0] == 'x' || peek[0] == '\\')
   5109      1.1  mrg 	    && (peek = do_peek_ident ("export", peek, limit))))
   5110      1.1  mrg 	return false;
   5111      1.1  mrg 
   5112      1.1  mrg       /* export, peek for import or module.  No need to peek __import
   5113      1.1  mrg 	 here.  */
   5114      1.1  mrg       if (peek[0] == 'i')
   5115      1.1  mrg 	{
   5116      1.1  mrg 	  if (!((peek[1] == 'm' || peek[1] == '\\')
   5117      1.1  mrg 		&& (peek = do_peek_ident ("import", peek + 1, limit))))
   5118      1.1  mrg 	    return false;
   5119      1.1  mrg 	  import = true;
   5120      1.1  mrg 	}
   5121      1.1  mrg       else if (peek[0] == 'm')
   5122      1.1  mrg 	{
   5123      1.1  mrg 	  if (!((peek[1] == 'o' || peek[1] == '\\')
   5124      1.1  mrg 		&& (peek = do_peek_ident ("module", peek + 1, limit))))
   5125      1.1  mrg 	    return false;
   5126      1.1  mrg 	}
   5127      1.1  mrg       else
   5128      1.1  mrg 	return false;
   5129      1.1  mrg     }
   5130      1.1  mrg   else if (__builtin_expect (c == 'i', false))
   5131      1.1  mrg     {
   5132      1.1  mrg       if (!((peek[0] == 'm' || peek[0] == '\\')
   5133      1.1  mrg 	    && (peek = do_peek_ident ("import", peek, limit))))
   5134      1.1  mrg 	return false;
   5135      1.1  mrg       import = true;
   5136      1.1  mrg     }
   5137      1.1  mrg   else if (__builtin_expect (c == '_', false))
   5138      1.1  mrg     {
   5139      1.1  mrg       /* Needed for translated includes.   */
   5140      1.1  mrg       if (!((peek[0] == '_' || peek[0] == '\\')
   5141      1.1  mrg 	    && (peek = do_peek_ident ("__import", peek, limit))))
   5142      1.1  mrg 	return false;
   5143      1.1  mrg       import = true;
   5144      1.1  mrg     }
   5145      1.1  mrg   else if (__builtin_expect (c == 'm', false))
   5146      1.1  mrg     {
   5147      1.1  mrg       if (!((peek[0] == 'o' || peek[0] == '\\')
   5148      1.1  mrg 	    && (peek = do_peek_ident ("module", peek, limit))))
   5149      1.1  mrg 	return false;
   5150      1.1  mrg     }
   5151      1.1  mrg   else
   5152      1.1  mrg     return false;
   5153      1.1  mrg 
   5154      1.1  mrg   /* Peek the next character to see if it's good enough.  We'll be at
   5155      1.1  mrg      the first non-whitespace char, including skipping an escaped
   5156      1.1  mrg      newline.  */
   5157      1.1  mrg   /* ... import followed by identifier, ':', '<' or header-name
   5158      1.1  mrg      preprocessing tokens, or module followed by identifier, ':' or
   5159      1.1  mrg      ';' preprocessing tokens.  */
   5160      1.1  mrg   unsigned char p = *peek++;
   5161      1.1  mrg 
   5162      1.1  mrg   /* A character literal is ... single quotes, ... optionally preceded
   5163      1.1  mrg      by u8, u, U, or L */
   5164      1.1  mrg   /* A string-literal is a ... double quotes, optionally prefixed by
   5165      1.1  mrg      R, u8, u8R, u, uR, U, UR, L, or LR */
   5166      1.1  mrg   if (p == 'u')
   5167      1.1  mrg     {
   5168      1.1  mrg       peek = do_peek_next (peek, limit);
   5169      1.1  mrg       if (*peek == '8')
   5170      1.1  mrg 	{
   5171      1.1  mrg 	  peek++;
   5172      1.1  mrg 	  goto peek_u8;
   5173      1.1  mrg 	}
   5174      1.1  mrg       goto peek_u;
   5175      1.1  mrg     }
   5176      1.1  mrg   else if (p == 'U' || p == 'L')
   5177      1.1  mrg     {
   5178      1.1  mrg     peek_u8:
   5179      1.1  mrg       peek = do_peek_next (peek, limit);
   5180      1.1  mrg     peek_u:
   5181      1.1  mrg       if (*peek == '\"' || *peek == '\'')
   5182      1.1  mrg 	return false;
   5183      1.1  mrg 
   5184      1.1  mrg       if (*peek == 'R')
   5185      1.1  mrg 	goto peek_R;
   5186      1.1  mrg       /* Identifier. Ok.  */
   5187      1.1  mrg     }
   5188      1.1  mrg   else if (p == 'R')
   5189      1.1  mrg     {
   5190      1.1  mrg     peek_R:
   5191      1.1  mrg       if (CPP_OPTION (pfile, rliterals))
   5192      1.1  mrg 	{
   5193      1.1  mrg 	  peek = do_peek_next (peek, limit);
   5194      1.1  mrg 	  if (*peek == '\"')
   5195      1.1  mrg 	    return false;
   5196      1.1  mrg 	}
   5197      1.1  mrg       /* Identifier. Ok.  */
   5198      1.1  mrg     }
   5199      1.1  mrg   else if ('Z' - 'A' == 25
   5200      1.1  mrg 	   ? ((p >= 'A' && p <= 'Z') || (p >= 'a' && p <= 'z') || p == '_')
   5201      1.1  mrg 	   : ISIDST (p))
   5202      1.1  mrg     {
   5203      1.1  mrg       /* Identifier.  Ok. */
   5204      1.1  mrg     }
   5205      1.1  mrg   else if (p == '<')
   5206      1.1  mrg     {
   5207      1.1  mrg       /* Maybe angle header, ok for import.  Reject
   5208      1.1  mrg 	 '<=', '<<' digraph:'<:'.  */
   5209      1.1  mrg       if (!import)
   5210      1.1  mrg 	return false;
   5211      1.1  mrg       peek = do_peek_next (peek, limit);
   5212      1.1  mrg       if (*peek == '=' || *peek == '<'
   5213      1.1  mrg 	  || (*peek == ':' && CPP_OPTION (pfile, digraphs)))
   5214      1.1  mrg 	return false;
   5215      1.1  mrg     }
   5216      1.1  mrg   else if (p == ';')
   5217      1.1  mrg     {
   5218      1.1  mrg       /* SEMICOLON, ok for module.  */
   5219      1.1  mrg       if (import)
   5220      1.1  mrg 	return false;
   5221      1.1  mrg     }
   5222      1.1  mrg   else if (p == '"')
   5223      1.1  mrg     {
   5224      1.1  mrg       /* STRING, ok for import.  */
   5225      1.1  mrg       if (!import)
   5226      1.1  mrg 	return false;
   5227      1.1  mrg     }
   5228      1.1  mrg   else if (p == ':')
   5229      1.1  mrg     {
   5230      1.1  mrg       /* Maybe COLON, ok.  Reject '::', digraph:':>'.  */
   5231      1.1  mrg       peek = do_peek_next (peek, limit);
   5232      1.1  mrg       if (*peek == ':' || (*peek == '>' && CPP_OPTION (pfile, digraphs)))
   5233      1.1  mrg 	return false;
   5234      1.1  mrg     }
   5235      1.1  mrg   else
   5236      1.1  mrg     /* FIXME: Detect a unicode character, excluding those not
   5237      1.1  mrg        permitted as the initial character. [lex.name]/1.  I presume
   5238      1.1  mrg        we need to check the \[uU] spellings, and directly using
   5239      1.1  mrg        Unicode in say UTF8 form?  Or perhaps we do the phase-1
   5240      1.1  mrg        conversion of UTF8 to universal-character-names?  */
   5241      1.1  mrg     return false;
   5242      1.1  mrg 
   5243      1.1  mrg   return true;
   5244      1.1  mrg }
   5245      1.1  mrg 
   5246      1.1  mrg /* Directives-only scanning.  Somewhat more relaxed than correct
   5247      1.1  mrg    parsing -- some ill-formed programs will not be rejected.  */
   5248      1.1  mrg 
   5249      1.1  mrg void
   5250      1.1  mrg cpp_directive_only_process (cpp_reader *pfile,
   5251      1.1  mrg 			    void *data,
   5252      1.1  mrg 			    void (*cb) (cpp_reader *, CPP_DO_task, void *, ...))
   5253      1.1  mrg {
   5254      1.1  mrg   bool module_p = CPP_OPTION (pfile, module_directives);
   5255      1.1  mrg 
   5256      1.1  mrg   do
   5257      1.1  mrg     {
   5258      1.1  mrg     restart:
   5259      1.1  mrg       /* Buffer initialization, but no line cleaning. */
   5260      1.1  mrg       cpp_buffer *buffer = pfile->buffer;
   5261      1.1  mrg       buffer->cur_note = buffer->notes_used = 0;
   5262      1.1  mrg       buffer->cur = buffer->line_base = buffer->next_line;
   5263      1.1  mrg       buffer->need_line = false;
   5264      1.1  mrg       /* Files always end in a newline or carriage return.  We rely on this for
   5265      1.1  mrg 	 character peeking safety.  */
   5266      1.1  mrg       gcc_assert (buffer->rlimit[0] == '\n' || buffer->rlimit[0] == '\r');
   5267      1.1  mrg 
   5268      1.1  mrg       const unsigned char *base = buffer->cur;
   5269      1.1  mrg       unsigned line_count = 0;
   5270      1.1  mrg       const unsigned char *line_start = base;
   5271      1.1  mrg 
   5272      1.1  mrg       bool bol = true;
   5273      1.1  mrg       bool raw = false;
   5274      1.1  mrg 
   5275      1.1  mrg       const unsigned char *lwm = base;
   5276      1.1  mrg       for (const unsigned char *pos = base, *limit = buffer->rlimit;
   5277      1.1  mrg 	   pos < limit;)
   5278      1.1  mrg 	{
   5279      1.1  mrg 	  unsigned char c = *pos++;
   5280      1.1  mrg 	  /* This matches the switch in _cpp_lex_direct.  */
   5281      1.1  mrg 	  switch (c)
   5282      1.1  mrg 	    {
   5283      1.1  mrg 	    case ' ': case '\t': case '\f': case '\v':
   5284      1.1  mrg 	      /* Whitespace, do nothing.  */
   5285      1.1  mrg 	      break;
   5286      1.1  mrg 
   5287      1.1  mrg 	    case '\r': /* MAC line ending, or Windows \r\n  */
   5288      1.1  mrg 	      if (*pos == '\n')
   5289      1.1  mrg 		pos++;
   5290      1.1  mrg 	      /* FALLTHROUGH */
   5291      1.1  mrg 
   5292      1.1  mrg 	    case '\n':
   5293      1.1  mrg 	      bol = true;
   5294      1.1  mrg 
   5295      1.1  mrg 	    next_line:
   5296      1.1  mrg 	      CPP_INCREMENT_LINE (pfile, 0);
   5297      1.1  mrg 	      line_count++;
   5298      1.1  mrg 	      line_start = pos;
   5299      1.1  mrg 	      break;
   5300      1.1  mrg 
   5301      1.1  mrg 	    case '\\':
   5302      1.1  mrg 	      /* <backslash><newline> is removed, and doesn't undo any
   5303      1.1  mrg 		 preceeding escape or whatnot.  */
   5304      1.1  mrg 	      if (*pos == '\n')
   5305      1.1  mrg 		{
   5306      1.1  mrg 		  pos++;
   5307      1.1  mrg 		  goto next_line;
   5308      1.1  mrg 		}
   5309      1.1  mrg 	      else if (*pos == '\r')
   5310      1.1  mrg 		{
   5311      1.1  mrg 		  if (pos[1] == '\n')
   5312      1.1  mrg 		    pos++;
   5313      1.1  mrg 		  pos++;
   5314      1.1  mrg 		  goto next_line;
   5315      1.1  mrg 		}
   5316      1.1  mrg 	      goto dflt;
   5317      1.1  mrg 
   5318      1.1  mrg 	    case '#':
   5319      1.1  mrg 	      if (bol)
   5320      1.1  mrg 		{
   5321      1.1  mrg 		  /* Line directive.  */
   5322      1.1  mrg 		  if (pos - 1 > base && !pfile->state.skipping)
   5323      1.1  mrg 		    cb (pfile, CPP_DO_print, data,
   5324      1.1  mrg 			line_count, base, pos - 1 - base);
   5325      1.1  mrg 
   5326      1.1  mrg 		  /* Prep things for directive handling. */
   5327      1.1  mrg 		  buffer->next_line = pos;
   5328      1.1  mrg 		  buffer->need_line = true;
   5329      1.1  mrg 		  bool ok = _cpp_get_fresh_line (pfile);
   5330      1.1  mrg 		  gcc_checking_assert (ok);
   5331      1.1  mrg 
   5332      1.1  mrg 		  /* Ensure proper column numbering for generated
   5333      1.1  mrg 		     error messages. */
   5334      1.1  mrg 		  buffer->line_base -= pos - line_start;
   5335      1.1  mrg 
   5336      1.1  mrg 		  _cpp_handle_directive (pfile, line_start + 1 != pos);
   5337      1.1  mrg 
   5338      1.1  mrg 		  /* Sanitize the line settings.  Duplicate #include's can
   5339      1.1  mrg 		     mess things up. */
   5340      1.1  mrg 		  // FIXME: Necessary?
   5341      1.1  mrg 		  pfile->line_table->highest_location
   5342      1.1  mrg 		    = pfile->line_table->highest_line;
   5343      1.1  mrg 
   5344      1.1  mrg 		  if (!pfile->state.skipping
   5345      1.1  mrg 		      && pfile->buffer->next_line < pfile->buffer->rlimit)
   5346      1.1  mrg 		    cb (pfile, CPP_DO_location, data,
   5347      1.1  mrg 			pfile->line_table->highest_line);
   5348      1.1  mrg 
   5349      1.1  mrg 		  goto restart;
   5350      1.1  mrg 		}
   5351      1.1  mrg 	      goto dflt;
   5352      1.1  mrg 
   5353      1.1  mrg 	    case '/':
   5354      1.1  mrg 	      {
   5355      1.1  mrg 		const unsigned char *peek = do_peek_next (pos, limit);
   5356      1.1  mrg 		if (!(*peek == '/' || *peek == '*'))
   5357      1.1  mrg 		  goto dflt;
   5358      1.1  mrg 
   5359      1.1  mrg 		/* Line or block comment  */
   5360      1.1  mrg 		bool is_block = *peek == '*';
   5361      1.1  mrg 		bool star = false;
   5362      1.1  mrg 		bool esc = false;
   5363      1.1  mrg 		location_t sloc
   5364      1.1  mrg 		  = linemap_position_for_column (pfile->line_table,
   5365      1.1  mrg 						 pos - line_start);
   5366      1.1  mrg 
   5367      1.1  mrg 		while (pos < limit)
   5368      1.1  mrg 		  {
   5369      1.1  mrg 		    char c = *pos++;
   5370      1.1  mrg 		    switch (c)
   5371      1.1  mrg 		      {
   5372      1.1  mrg 		      case '\\':
   5373      1.1  mrg 			esc = true;
   5374      1.1  mrg 			break;
   5375      1.1  mrg 
   5376      1.1  mrg 		      case '\r':
   5377      1.1  mrg 			if (*pos == '\n')
   5378      1.1  mrg 			  pos++;
   5379      1.1  mrg 			/* FALLTHROUGH  */
   5380      1.1  mrg 
   5381      1.1  mrg 		      case '\n':
   5382      1.1  mrg 			{
   5383      1.1  mrg 			  CPP_INCREMENT_LINE (pfile, 0);
   5384      1.1  mrg 			  line_count++;
   5385      1.1  mrg 			  line_start = pos;
   5386      1.1  mrg 			  if (!esc && !is_block)
   5387      1.1  mrg 			    {
   5388      1.1  mrg 			      bol = true;
   5389      1.1  mrg 			      goto done_comment;
   5390      1.1  mrg 			    }
   5391      1.1  mrg 			}
   5392      1.1  mrg 			if (!esc)
   5393      1.1  mrg 			  star = false;
   5394      1.1  mrg 			esc = false;
   5395      1.1  mrg 			break;
   5396      1.1  mrg 
   5397      1.1  mrg 		      case '*':
   5398      1.1  mrg 			if (pos > peek)
   5399      1.1  mrg 			  star = is_block;
   5400      1.1  mrg 			esc = false;
   5401      1.1  mrg 			break;
   5402      1.1  mrg 
   5403      1.1  mrg 		      case '/':
   5404      1.1  mrg 			if (star)
   5405      1.1  mrg 			  goto done_comment;
   5406      1.1  mrg 			/* FALLTHROUGH  */
   5407      1.1  mrg 
   5408      1.1  mrg 		      default:
   5409      1.1  mrg 			star = false;
   5410      1.1  mrg 			esc = false;
   5411      1.1  mrg 			break;
   5412      1.1  mrg 		      }
   5413      1.1  mrg 		  }
   5414      1.1  mrg 		if (pos < limit || is_block)
   5415      1.1  mrg 		  cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0,
   5416      1.1  mrg 				       "unterminated comment");
   5417      1.1  mrg 	      done_comment:
   5418      1.1  mrg 		lwm = pos;
   5419      1.1  mrg 		break;
   5420      1.1  mrg 	      }
   5421      1.1  mrg 
   5422      1.1  mrg 	    case '\'':
   5423      1.1  mrg 	      if (!CPP_OPTION (pfile, digit_separators))
   5424      1.1  mrg 		goto delimited_string;
   5425      1.1  mrg 
   5426      1.1  mrg 	      /* Possibly a number punctuator.  */
   5427      1.1  mrg 	      if (!ISIDNUM (*do_peek_next (pos, limit)))
   5428      1.1  mrg 		goto delimited_string;
   5429      1.1  mrg 
   5430      1.1  mrg 	      goto quote_peek;
   5431      1.1  mrg 
   5432      1.1  mrg 	    case '\"':
   5433      1.1  mrg 	      if (!CPP_OPTION (pfile, rliterals))
   5434      1.1  mrg 		goto delimited_string;
   5435      1.1  mrg 
   5436      1.1  mrg 	    quote_peek:
   5437      1.1  mrg 	      {
   5438      1.1  mrg 		/* For ' see if it's a number punctuator
   5439      1.1  mrg 		   \.?<digit>(<digit>|<identifier-nondigit>
   5440      1.1  mrg 		   |'<digit>|'<nondigit>|[eEpP]<sign>|\.)* */
   5441      1.1  mrg 		/* For " see if it's a raw string
   5442      1.1  mrg 		   {U,L,u,u8}R.  This includes CPP_NUMBER detection,
   5443      1.1  mrg 		   because that could be 0e+R.  */
   5444      1.1  mrg 		const unsigned char *peek = pos - 1;
   5445      1.1  mrg 		bool quote_first = c == '"';
   5446      1.1  mrg 		bool quote_eight = false;
   5447      1.1  mrg 		bool maybe_number_start = false;
   5448      1.1  mrg 		bool want_number = false;
   5449      1.1  mrg 
   5450      1.1  mrg 		while ((peek = do_peek_prev (peek, lwm)))
   5451      1.1  mrg 		  {
   5452      1.1  mrg 		    unsigned char p = *peek;
   5453      1.1  mrg 		    if (quote_first)
   5454      1.1  mrg 		      {
   5455      1.1  mrg 			if (!raw)
   5456      1.1  mrg 			  {
   5457      1.1  mrg 			    if (p != 'R')
   5458      1.1  mrg 			      break;
   5459      1.1  mrg 			    raw = true;
   5460      1.1  mrg 			    continue;
   5461      1.1  mrg 			  }
   5462      1.1  mrg 
   5463      1.1  mrg 			quote_first = false;
   5464      1.1  mrg 			if (p == 'L' || p == 'U' || p == 'u')
   5465      1.1  mrg 			  ;
   5466      1.1  mrg 			else if (p == '8')
   5467      1.1  mrg 			  quote_eight = true;
   5468      1.1  mrg 			else
   5469      1.1  mrg 			  goto second_raw;
   5470      1.1  mrg 		      }
   5471      1.1  mrg 		    else if (quote_eight)
   5472      1.1  mrg 		      {
   5473      1.1  mrg 			if (p != 'u')
   5474      1.1  mrg 			  {
   5475      1.1  mrg 			    raw = false;
   5476      1.1  mrg 			    break;
   5477      1.1  mrg 			  }
   5478      1.1  mrg 			quote_eight = false;
   5479      1.1  mrg 		      }
   5480      1.1  mrg 		    else if (c == '"')
   5481      1.1  mrg 		      {
   5482      1.1  mrg 		      second_raw:;
   5483      1.1  mrg 			if (!want_number && ISIDNUM (p))
   5484      1.1  mrg 			  {
   5485      1.1  mrg 			    raw = false;
   5486      1.1  mrg 			    break;
   5487      1.1  mrg 			  }
   5488      1.1  mrg 		      }
   5489      1.1  mrg 
   5490      1.1  mrg 		    if (ISDIGIT (p))
   5491      1.1  mrg 		      maybe_number_start = true;
   5492      1.1  mrg 		    else if (p == '.')
   5493      1.1  mrg 		      want_number = true;
   5494      1.1  mrg 		    else if (ISIDNUM (p))
   5495      1.1  mrg 		      maybe_number_start = false;
   5496      1.1  mrg 		    else if (p == '+' || p == '-')
   5497      1.1  mrg 		      {
   5498      1.1  mrg 			if (const unsigned char *peek_prev
   5499      1.1  mrg 			    = do_peek_prev (peek, lwm))
   5500      1.1  mrg 			  {
   5501      1.1  mrg 			    p = *peek_prev;
   5502      1.1  mrg 			    if (p == 'e' || p == 'E'
   5503      1.1  mrg 				|| p == 'p' || p == 'P')
   5504      1.1  mrg 			      {
   5505      1.1  mrg 				want_number = true;
   5506      1.1  mrg 				maybe_number_start = false;
   5507      1.1  mrg 			      }
   5508      1.1  mrg 			    else
   5509      1.1  mrg 			      break;
   5510      1.1  mrg 			  }
   5511      1.1  mrg 			else
   5512      1.1  mrg 			  break;
   5513      1.1  mrg 		      }
   5514      1.1  mrg 		    else if (p == '\'' || p == '\"')
   5515      1.1  mrg 		      {
   5516      1.1  mrg 			/* If this is lwm, this must be the end of a
   5517      1.1  mrg 			   previous string.  So this is a trailing
   5518      1.1  mrg 			   literal type, (a) if those are allowed,
   5519      1.1  mrg 			     and (b) maybe_start is false.  Otherwise
   5520      1.1  mrg 			     this must be a CPP_NUMBER because we've
   5521      1.1  mrg 			     met another ', and we'd have checked that
   5522      1.1  mrg 			     in its own right.  */
   5523      1.1  mrg 			if (peek == lwm && CPP_OPTION (pfile, uliterals))
   5524      1.1  mrg 			  {
   5525      1.1  mrg 			    if  (!maybe_number_start && !want_number)
   5526      1.1  mrg 			      /* Must be a literal type.  */
   5527      1.1  mrg 			      raw = false;
   5528      1.1  mrg 			  }
   5529      1.1  mrg 			else if (p == '\''
   5530      1.1  mrg 				 && CPP_OPTION (pfile, digit_separators))
   5531      1.1  mrg 			  maybe_number_start = true;
   5532      1.1  mrg 			break;
   5533      1.1  mrg 		      }
   5534      1.1  mrg 		    else if (c == '\'')
   5535      1.1  mrg 		      break;
   5536      1.1  mrg 		    else if (!quote_first && !quote_eight)
   5537      1.1  mrg 		      break;
   5538      1.1  mrg 		  }
   5539      1.1  mrg 
   5540      1.1  mrg 		if (maybe_number_start)
   5541      1.1  mrg 		  {
   5542      1.1  mrg 		    if (c == '\'')
   5543      1.1  mrg 		      /* A CPP NUMBER.  */
   5544      1.1  mrg 		      goto dflt;
   5545      1.1  mrg 		    raw = false;
   5546      1.1  mrg 		  }
   5547      1.1  mrg 
   5548      1.1  mrg 		goto delimited_string;
   5549      1.1  mrg 	      }
   5550      1.1  mrg 
   5551      1.1  mrg 	    delimited_string:
   5552      1.1  mrg 	      {
   5553      1.1  mrg 		/* (Possibly raw) string or char literal.  */
   5554      1.1  mrg 		unsigned char end = c;
   5555      1.1  mrg 		int delim_len = -1;
   5556      1.1  mrg 		const unsigned char *delim = NULL;
   5557      1.1  mrg 		location_t sloc = linemap_position_for_column (pfile->line_table,
   5558      1.1  mrg 							       pos - line_start);
   5559      1.1  mrg 		int esc = 0;
   5560      1.1  mrg 
   5561      1.1  mrg 		if (raw)
   5562      1.1  mrg 		  {
   5563      1.1  mrg 		    /* There can be no line breaks in the delimiter.  */
   5564      1.1  mrg 		    delim = pos;
   5565      1.1  mrg 		    for (delim_len = 0; (c = *pos++) != '('; delim_len++)
   5566      1.1  mrg 		      {
   5567      1.1  mrg 			if (delim_len == 16)
   5568      1.1  mrg 			  {
   5569      1.1  mrg 			    cpp_error_with_line (pfile, CPP_DL_ERROR,
   5570      1.1  mrg 						 sloc, 0,
   5571      1.1  mrg 						 "raw string delimiter"
   5572      1.1  mrg 						 " longer than %d"
   5573      1.1  mrg 						 " characters",
   5574      1.1  mrg 						 delim_len);
   5575      1.1  mrg 			    raw = false;
   5576      1.1  mrg 			    pos = delim;
   5577      1.1  mrg 			    break;
   5578      1.1  mrg 			  }
   5579      1.1  mrg 			if (strchr (") \\\t\v\f\n", c))
   5580      1.1  mrg 			  {
   5581      1.1  mrg 			    cpp_error_with_line (pfile, CPP_DL_ERROR,
   5582      1.1  mrg 						 sloc, 0,
   5583      1.1  mrg 						 "invalid character '%c'"
   5584      1.1  mrg 						 " in raw string"
   5585      1.1  mrg 						 " delimiter", c);
   5586      1.1  mrg 			    raw = false;
   5587      1.1  mrg 			    pos = delim;
   5588      1.1  mrg 			    break;
   5589      1.1  mrg 			  }
   5590      1.1  mrg 			if (pos >= limit)
   5591      1.1  mrg 			  goto bad_string;
   5592      1.1  mrg 		      }
   5593      1.1  mrg 		  }
   5594      1.1  mrg 
   5595      1.1  mrg 		while (pos < limit)
   5596      1.1  mrg 		  {
   5597      1.1  mrg 		    char c = *pos++;
   5598      1.1  mrg 		    switch (c)
   5599      1.1  mrg 		      {
   5600      1.1  mrg 		      case '\\':
   5601      1.1  mrg 			if (!raw)
   5602      1.1  mrg 			  esc++;
   5603      1.1  mrg 			break;
   5604      1.1  mrg 
   5605      1.1  mrg 		      case '\r':
   5606      1.1  mrg 			if (*pos == '\n')
   5607      1.1  mrg 			  pos++;
   5608      1.1  mrg 			/* FALLTHROUGH  */
   5609      1.1  mrg 
   5610      1.1  mrg 		      case '\n':
   5611      1.1  mrg 			{
   5612      1.1  mrg 			  CPP_INCREMENT_LINE (pfile, 0);
   5613      1.1  mrg 			  line_count++;
   5614      1.1  mrg 			  line_start = pos;
   5615      1.1  mrg 			}
   5616      1.1  mrg 			if (esc)
   5617      1.1  mrg 			  esc--;
   5618      1.1  mrg 			break;
   5619      1.1  mrg 
   5620      1.1  mrg 		      case ')':
   5621      1.1  mrg 			if (raw
   5622      1.1  mrg 			    && pos + delim_len + 1 < limit
   5623      1.1  mrg 			    && pos[delim_len] == end
   5624      1.1  mrg 			    && !memcmp (delim, pos, delim_len))
   5625      1.1  mrg 			  {
   5626      1.1  mrg 			    pos += delim_len + 1;
   5627      1.1  mrg 			    raw = false;
   5628      1.1  mrg 			    goto done_string;
   5629      1.1  mrg 			  }
   5630      1.1  mrg 			break;
   5631      1.1  mrg 
   5632      1.1  mrg 		      default:
   5633      1.1  mrg 			if (!raw && !(esc & 1) && c == end)
   5634      1.1  mrg 			  goto done_string;
   5635      1.1  mrg 			esc = 0;
   5636      1.1  mrg 			break;
   5637      1.1  mrg 		      }
   5638      1.1  mrg 		  }
   5639      1.1  mrg 	      bad_string:
   5640      1.1  mrg 		cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0,
   5641      1.1  mrg 				     "unterminated literal");
   5642      1.1  mrg 
   5643      1.1  mrg 	      done_string:
   5644      1.1  mrg 		raw = false;
   5645      1.1  mrg 		lwm = pos - 1;
   5646      1.1  mrg 	      }
   5647      1.1  mrg 	      goto dflt;
   5648      1.1  mrg 
   5649      1.1  mrg 	    case '_':
   5650      1.1  mrg 	    case 'e':
   5651      1.1  mrg 	    case 'i':
   5652      1.1  mrg 	    case 'm':
   5653      1.1  mrg 	      if (bol && module_p && !pfile->state.skipping
   5654      1.1  mrg 		  && do_peek_module (pfile, c, pos, limit))
   5655      1.1  mrg 		{
   5656      1.1  mrg 		  /* We've seen the start of a module control line.
   5657      1.1  mrg 		     Start up the tokenizer.  */
   5658      1.1  mrg 		  pos--; /* Backup over the first character.  */
   5659      1.1  mrg 
   5660      1.1  mrg 		  /* Backup over whitespace to start of line.  */
   5661      1.1  mrg 		  while (pos > line_start
   5662      1.1  mrg 			 && (pos[-1] == ' ' || pos[-1] == '\t'))
   5663      1.1  mrg 		    pos--;
   5664      1.1  mrg 
   5665      1.1  mrg 		  if (pos > base)
   5666      1.1  mrg 		    cb (pfile, CPP_DO_print, data, line_count, base, pos - base);
   5667      1.1  mrg 
   5668      1.1  mrg 		  /* Prep things for directive handling. */
   5669      1.1  mrg 		  buffer->next_line = pos;
   5670      1.1  mrg 		  buffer->need_line = true;
   5671      1.1  mrg 
   5672      1.1  mrg 		  /* Now get tokens until the PRAGMA_EOL.  */
   5673      1.1  mrg 		  do
   5674      1.1  mrg 		    {
   5675      1.1  mrg 		      location_t spelling;
   5676      1.1  mrg 		      const cpp_token *tok
   5677      1.1  mrg 			= cpp_get_token_with_location (pfile, &spelling);
   5678      1.1  mrg 
   5679      1.1  mrg 		      gcc_assert (pfile->state.in_deferred_pragma
   5680      1.1  mrg 				  || tok->type == CPP_PRAGMA_EOL);
   5681      1.1  mrg 		      cb (pfile, CPP_DO_token, data, tok, spelling);
   5682      1.1  mrg 		    }
   5683      1.1  mrg 		  while (pfile->state.in_deferred_pragma);
   5684      1.1  mrg 
   5685      1.1  mrg 		  if (pfile->buffer->next_line < pfile->buffer->rlimit)
   5686      1.1  mrg 		    cb (pfile, CPP_DO_location, data,
   5687      1.1  mrg 			pfile->line_table->highest_line);
   5688      1.1  mrg 
   5689      1.1  mrg 		  pfile->mi_valid = false;
   5690      1.1  mrg 		  goto restart;
   5691      1.1  mrg 		}
   5692      1.1  mrg 	      goto dflt;
   5693      1.1  mrg 
   5694      1.1  mrg 	    default:
   5695      1.1  mrg 	    dflt:
   5696      1.1  mrg 	      bol = false;
   5697      1.1  mrg 	      pfile->mi_valid = false;
   5698      1.1  mrg 	      break;
   5699      1.1  mrg 	    }
   5700      1.1  mrg 	}
   5701      1.1  mrg 
   5702      1.1  mrg       if (buffer->rlimit > base && !pfile->state.skipping)
   5703      1.1  mrg 	{
   5704      1.1  mrg 	  const unsigned char *limit = buffer->rlimit;
   5705      1.1  mrg 	  /* If the file was not newline terminated, add rlimit, which is
   5706      1.1  mrg 	     guaranteed to point to a newline, to the end of our range.  */
   5707      1.1  mrg 	  if (limit[-1] != '\n')
   5708      1.1  mrg 	    {
   5709      1.1  mrg 	      limit++;
   5710      1.1  mrg 	      CPP_INCREMENT_LINE (pfile, 0);
   5711      1.1  mrg 	      line_count++;
   5712      1.1  mrg 	    }
   5713      1.1  mrg 	  cb (pfile, CPP_DO_print, data, line_count, base, limit - base);
   5714      1.1  mrg 	}
   5715      1.1  mrg 
   5716      1.1  mrg       _cpp_pop_buffer (pfile);
   5717      1.1  mrg     }
   5718      1.1  mrg   while (pfile->buffer);
   5719               }
   5720