Home | History | Annotate | Line # | Download | only in libcpp
lex.cc revision 1.3
      1  1.1  mrg /* CPP Library - lexical analysis.
      2  1.1  mrg    Copyright (C) 2000-2022 Free Software Foundation, Inc.
      3  1.1  mrg    Contributed by Per Bothner, 1994-95.
      4  1.1  mrg    Based on CCCP program by Paul Rubin, June 1986
      5  1.1  mrg    Adapted to ANSI C, Richard Stallman, Jan 1987
      6  1.1  mrg    Broken out to separate file, Zack Weinberg, Mar 2000
      7  1.1  mrg 
      8  1.1  mrg This program is free software; you can redistribute it and/or modify it
      9  1.1  mrg under the terms of the GNU General Public License as published by the
     10  1.1  mrg Free Software Foundation; either version 3, or (at your option) any
     11  1.1  mrg later version.
     12  1.1  mrg 
     13  1.1  mrg This program is distributed in the hope that it will be useful,
     14  1.1  mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
     15  1.1  mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16  1.1  mrg GNU General Public License for more details.
     17  1.1  mrg 
     18  1.1  mrg You should have received a copy of the GNU General Public License
     19  1.1  mrg along with this program; see the file COPYING3.  If not see
     20  1.1  mrg <http://www.gnu.org/licenses/>.  */
     21  1.1  mrg 
     22  1.1  mrg #include "config.h"
     23  1.1  mrg #include "system.h"
     24  1.1  mrg #include "cpplib.h"
     25  1.1  mrg #include "internal.h"
     26  1.1  mrg 
     27  1.1  mrg enum spell_type
     28  1.1  mrg {
     29  1.1  mrg   SPELL_OPERATOR = 0,
     30  1.1  mrg   SPELL_IDENT,
     31  1.1  mrg   SPELL_LITERAL,
     32  1.1  mrg   SPELL_NONE
     33  1.1  mrg };
     34  1.1  mrg 
     35  1.1  mrg struct token_spelling
     36  1.1  mrg {
     37  1.1  mrg   enum spell_type category;
     38  1.1  mrg   const unsigned char *name;
     39  1.1  mrg };
     40  1.1  mrg 
     41  1.1  mrg static const unsigned char *const digraph_spellings[] =
     42  1.1  mrg { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
     43  1.1  mrg 
     44  1.1  mrg #define OP(e, s) { SPELL_OPERATOR, UC s  },
     45  1.1  mrg #define TK(e, s) { SPELL_ ## s,    UC #e },
     46  1.1  mrg static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
     47  1.1  mrg #undef OP
     48  1.1  mrg #undef TK
     49  1.1  mrg 
     50  1.1  mrg #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
     51  1.1  mrg #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
     52  1.1  mrg 
     53  1.1  mrg static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
     54  1.1  mrg static int skip_line_comment (cpp_reader *);
     55  1.1  mrg static void skip_whitespace (cpp_reader *, cppchar_t);
     56  1.1  mrg static void lex_string (cpp_reader *, cpp_token *, const uchar *);
     57  1.1  mrg static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
     58  1.1  mrg static void store_comment (cpp_reader *, cpp_token *);
     59  1.1  mrg static void create_literal (cpp_reader *, cpp_token *, const uchar *,
     60  1.1  mrg 			    unsigned int, enum cpp_ttype);
     61  1.1  mrg static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
     62  1.1  mrg static int name_p (cpp_reader *, const cpp_string *);
     63  1.1  mrg static tokenrun *next_tokenrun (tokenrun *);
     64  1.1  mrg 
     65  1.1  mrg static _cpp_buff *new_buff (size_t);
     66  1.1  mrg 
     67  1.1  mrg 
     68  1.1  mrg /* Utility routine:
     69  1.1  mrg 
     70  1.1  mrg    Compares, the token TOKEN to the NUL-terminated string STRING.
     71  1.1  mrg    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
     72  1.1  mrg int
     73  1.1  mrg cpp_ideq (const cpp_token *token, const char *string)
     74  1.1  mrg {
     75  1.1  mrg   if (token->type != CPP_NAME)
     76  1.1  mrg     return 0;
     77  1.1  mrg 
     78  1.1  mrg   return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
     79  1.1  mrg }
     80  1.1  mrg 
     81  1.1  mrg /* Record a note TYPE at byte POS into the current cleaned logical
     82  1.1  mrg    line.  */
     83  1.1  mrg static void
     84  1.1  mrg add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
     85  1.1  mrg {
     86  1.1  mrg   if (buffer->notes_used == buffer->notes_cap)
     87  1.1  mrg     {
     88  1.1  mrg       buffer->notes_cap = buffer->notes_cap * 2 + 200;
     89  1.1  mrg       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
     90  1.1  mrg                                   buffer->notes_cap);
     91  1.1  mrg     }
     92  1.1  mrg 
     93  1.1  mrg   buffer->notes[buffer->notes_used].pos = pos;
     94  1.1  mrg   buffer->notes[buffer->notes_used].type = type;
     95  1.1  mrg   buffer->notes_used++;
     96  1.1  mrg }
     97  1.1  mrg 
     98  1.1  mrg 
     99  1.1  mrg /* Fast path to find line special characters using optimized character
    101  1.1  mrg    scanning algorithms.  Anything complicated falls back to the slow
    102  1.1  mrg    path below.  Since this loop is very hot it's worth doing these kinds
    103  1.1  mrg    of optimizations.
    104  1.1  mrg 
    105  1.1  mrg    One of the paths through the ifdefs should provide
    106  1.1  mrg 
    107  1.1  mrg      const uchar *search_line_fast (const uchar *s, const uchar *end);
    108  1.1  mrg 
    109  1.1  mrg    Between S and END, search for \n, \r, \\, ?.  Return a pointer to
    110  1.1  mrg    the found character.
    111  1.1  mrg 
    112  1.1  mrg    Note that the last character of the buffer is *always* a newline,
    113  1.1  mrg    as forced by _cpp_convert_input.  This fact can be used to avoid
    114  1.1  mrg    explicitly looking for the end of the buffer.  */
    115  1.1  mrg 
    116  1.1  mrg /* Configure gives us an ifdef test.  */
    117  1.1  mrg #ifndef WORDS_BIGENDIAN
    118  1.1  mrg #define WORDS_BIGENDIAN 0
    119  1.1  mrg #endif
    120  1.1  mrg 
    121  1.1  mrg /* We'd like the largest integer that fits into a register.  There's nothing
    122  1.1  mrg    in <stdint.h> that gives us that.  For most hosts this is unsigned long,
    123  1.1  mrg    but MS decided on an LLP64 model.  Thankfully when building with GCC we
    124  1.1  mrg    can get the "real" word size.  */
    125  1.1  mrg #ifdef __GNUC__
    126  1.1  mrg typedef unsigned int word_type __attribute__((__mode__(__word__)));
    127  1.1  mrg #else
    128  1.1  mrg typedef unsigned long word_type;
    129  1.1  mrg #endif
    130  1.1  mrg 
    131  1.1  mrg /* The code below is only expecting sizes 4 or 8.
    132  1.1  mrg    Die at compile-time if this expectation is violated.  */
    133  1.1  mrg typedef char check_word_type_size
    134  1.1  mrg   [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
    135  1.1  mrg 
    136  1.1  mrg /* Return X with the first N bytes forced to values that won't match one
    137  1.1  mrg    of the interesting characters.  Note that NUL is not interesting.  */
    138  1.1  mrg 
    139  1.1  mrg static inline word_type
    140  1.1  mrg acc_char_mask_misalign (word_type val, unsigned int n)
    141  1.1  mrg {
    142  1.1  mrg   word_type mask = -1;
    143  1.1  mrg   if (WORDS_BIGENDIAN)
    144  1.1  mrg     mask >>= n * 8;
    145  1.1  mrg   else
    146  1.1  mrg     mask <<= n * 8;
    147  1.1  mrg   return val & mask;
    148  1.1  mrg }
    149  1.1  mrg 
    150  1.1  mrg /* Return X replicated to all byte positions within WORD_TYPE.  */
    151  1.1  mrg 
    152  1.1  mrg static inline word_type
    153  1.1  mrg acc_char_replicate (uchar x)
    154  1.1  mrg {
    155  1.1  mrg   word_type ret;
    156  1.1  mrg 
    157  1.1  mrg   ret = (x << 24) | (x << 16) | (x << 8) | x;
    158  1.1  mrg   if (sizeof(word_type) == 8)
    159  1.1  mrg     ret = (ret << 16 << 16) | ret;
    160  1.1  mrg   return ret;
    161  1.1  mrg }
    162  1.1  mrg 
    163  1.1  mrg /* Return non-zero if some byte of VAL is (probably) C.  */
    164  1.1  mrg 
    165  1.1  mrg static inline word_type
    166  1.1  mrg acc_char_cmp (word_type val, word_type c)
    167  1.1  mrg {
    168  1.1  mrg #if defined(__GNUC__) && defined(__alpha__)
    169  1.1  mrg   /* We can get exact results using a compare-bytes instruction.
    170  1.1  mrg      Get (val == c) via (0 >= (val ^ c)).  */
    171  1.1  mrg   return __builtin_alpha_cmpbge (0, val ^ c);
    172  1.1  mrg #else
    173  1.1  mrg   word_type magic = 0x7efefefeU;
    174  1.1  mrg   if (sizeof(word_type) == 8)
    175  1.1  mrg     magic = (magic << 16 << 16) | 0xfefefefeU;
    176  1.1  mrg   magic |= 1;
    177  1.1  mrg 
    178  1.1  mrg   val ^= c;
    179  1.1  mrg   return ((val + magic) ^ ~val) & ~magic;
    180  1.1  mrg #endif
    181  1.1  mrg }
    182  1.1  mrg 
    183  1.1  mrg /* Given the result of acc_char_cmp is non-zero, return the index of
    184  1.1  mrg    the found character.  If this was a false positive, return -1.  */
    185  1.1  mrg 
    186  1.1  mrg static inline int
    187  1.1  mrg acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
    188  1.1  mrg 		word_type val ATTRIBUTE_UNUSED)
    189  1.1  mrg {
    190  1.1  mrg #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
    191  1.1  mrg   /* The cmpbge instruction sets *bits* of the result corresponding to
    192  1.1  mrg      matches in the bytes with no false positives.  */
    193  1.1  mrg   return __builtin_ctzl (cmp);
    194  1.1  mrg #else
    195  1.1  mrg   unsigned int i;
    196  1.1  mrg 
    197  1.1  mrg   /* ??? It would be nice to force unrolling here,
    198  1.1  mrg      and have all of these constants folded.  */
    199  1.1  mrg   for (i = 0; i < sizeof(word_type); ++i)
    200  1.1  mrg     {
    201  1.1  mrg       uchar c;
    202  1.1  mrg       if (WORDS_BIGENDIAN)
    203  1.1  mrg 	c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
    204  1.1  mrg       else
    205  1.1  mrg 	c = (val >> i * 8) & 0xff;
    206  1.1  mrg 
    207  1.1  mrg       if (c == '\n' || c == '\r' || c == '\\' || c == '?')
    208  1.1  mrg 	return i;
    209  1.1  mrg     }
    210  1.1  mrg 
    211  1.1  mrg   return -1;
    212  1.1  mrg #endif
    213  1.1  mrg }
    214  1.1  mrg 
    215  1.1  mrg /* A version of the fast scanner using bit fiddling techniques.
    216  1.1  mrg 
    217  1.1  mrg    For 32-bit words, one would normally perform 16 comparisons and
    218  1.1  mrg    16 branches.  With this algorithm one performs 24 arithmetic
    219  1.1  mrg    operations and one branch.  Whether this is faster with a 32-bit
    220  1.1  mrg    word size is going to be somewhat system dependent.
    221  1.1  mrg 
    222  1.1  mrg    For 64-bit words, we eliminate twice the number of comparisons
    223  1.1  mrg    and branches without increasing the number of arithmetic operations.
    224  1.1  mrg    It's almost certainly going to be a win with 64-bit word size.  */
    225  1.1  mrg 
    226  1.1  mrg static const uchar * search_line_acc_char (const uchar *, const uchar *)
    227  1.1  mrg   ATTRIBUTE_UNUSED;
    228  1.1  mrg 
    229  1.1  mrg static const uchar *
    230  1.1  mrg search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
    231  1.1  mrg {
    232  1.1  mrg   const word_type repl_nl = acc_char_replicate ('\n');
    233  1.1  mrg   const word_type repl_cr = acc_char_replicate ('\r');
    234  1.1  mrg   const word_type repl_bs = acc_char_replicate ('\\');
    235  1.1  mrg   const word_type repl_qm = acc_char_replicate ('?');
    236  1.1  mrg 
    237  1.1  mrg   unsigned int misalign;
    238  1.1  mrg   const word_type *p;
    239  1.1  mrg   word_type val, t;
    240  1.1  mrg 
    241  1.1  mrg   /* Align the buffer.  Mask out any bytes from before the beginning.  */
    242  1.1  mrg   p = (word_type *)((uintptr_t)s & -sizeof(word_type));
    243  1.1  mrg   val = *p;
    244  1.1  mrg   misalign = (uintptr_t)s & (sizeof(word_type) - 1);
    245  1.1  mrg   if (misalign)
    246  1.1  mrg     val = acc_char_mask_misalign (val, misalign);
    247  1.1  mrg 
    248  1.1  mrg   /* Main loop.  */
    249  1.1  mrg   while (1)
    250  1.1  mrg     {
    251  1.1  mrg       t  = acc_char_cmp (val, repl_nl);
    252  1.1  mrg       t |= acc_char_cmp (val, repl_cr);
    253  1.1  mrg       t |= acc_char_cmp (val, repl_bs);
    254  1.1  mrg       t |= acc_char_cmp (val, repl_qm);
    255  1.1  mrg 
    256  1.1  mrg       if (__builtin_expect (t != 0, 0))
    257  1.1  mrg 	{
    258  1.1  mrg 	  int i = acc_char_index (t, val);
    259  1.1  mrg 	  if (i >= 0)
    260  1.1  mrg 	    return (const uchar *)p + i;
    261  1.1  mrg 	}
    262  1.1  mrg 
    263  1.1  mrg       val = *++p;
    264  1.1  mrg     }
    265  1.1  mrg }
    266  1.1  mrg 
    267  1.1  mrg /* Disable on Solaris 2/x86 until the following problem can be properly
    268  1.1  mrg    autoconfed:
    269  1.1  mrg 
    270  1.1  mrg    The Solaris 10+ assembler tags objects with the instruction set
    271  1.1  mrg    extensions used, so SSE4.2 executables cannot run on machines that
    272  1.1  mrg    don't support that extension.  */
    273  1.1  mrg 
    274  1.1  mrg #if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
    275  1.1  mrg 
    276  1.1  mrg /* Replicated character data to be shared between implementations.
    277  1.1  mrg    Recall that outside of a context with vector support we can't
    278  1.1  mrg    define compatible vector types, therefore these are all defined
    279  1.1  mrg    in terms of raw characters.  */
    280  1.1  mrg static const char repl_chars[4][16] __attribute__((aligned(16))) = {
    281  1.1  mrg   { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
    282  1.1  mrg     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
    283  1.1  mrg   { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
    284  1.1  mrg     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
    285  1.1  mrg   { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
    286  1.1  mrg     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
    287  1.1  mrg   { '?', '?', '?', '?', '?', '?', '?', '?',
    288  1.1  mrg     '?', '?', '?', '?', '?', '?', '?', '?' },
    289  1.1  mrg };
    290  1.1  mrg 
    291  1.1  mrg /* A version of the fast scanner using MMX vectorized byte compare insns.
    292  1.1  mrg 
    293  1.1  mrg    This uses the PMOVMSKB instruction which was introduced with "MMX2",
    294  1.1  mrg    which was packaged into SSE1; it is also present in the AMD MMX
    295  1.1  mrg    extension.  Mark the function as using "sse" so that we emit a real
    296  1.1  mrg    "emms" instruction, rather than the 3dNOW "femms" instruction.  */
    297  1.1  mrg 
    298  1.1  mrg static const uchar *
    299  1.1  mrg #ifndef __SSE__
    300  1.1  mrg __attribute__((__target__("sse")))
    301  1.1  mrg #endif
    302  1.1  mrg search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
    303  1.1  mrg {
    304  1.1  mrg   typedef char v8qi __attribute__ ((__vector_size__ (8)));
    305  1.1  mrg   typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
    306  1.1  mrg 
    307  1.1  mrg   const v8qi repl_nl = *(const v8qi *)repl_chars[0];
    308  1.1  mrg   const v8qi repl_cr = *(const v8qi *)repl_chars[1];
    309  1.1  mrg   const v8qi repl_bs = *(const v8qi *)repl_chars[2];
    310  1.1  mrg   const v8qi repl_qm = *(const v8qi *)repl_chars[3];
    311  1.1  mrg 
    312  1.1  mrg   unsigned int misalign, found, mask;
    313  1.1  mrg   const v8qi *p;
    314  1.1  mrg   v8qi data, t, c;
    315  1.1  mrg 
    316  1.1  mrg   /* Align the source pointer.  While MMX doesn't generate unaligned data
    317  1.1  mrg      faults, this allows us to safely scan to the end of the buffer without
    318  1.1  mrg      reading beyond the end of the last page.  */
    319  1.1  mrg   misalign = (uintptr_t)s & 7;
    320  1.1  mrg   p = (const v8qi *)((uintptr_t)s & -8);
    321  1.1  mrg   data = *p;
    322  1.1  mrg 
    323  1.1  mrg   /* Create a mask for the bytes that are valid within the first
    324  1.1  mrg      16-byte block.  The Idea here is that the AND with the mask
    325  1.1  mrg      within the loop is "free", since we need some AND or TEST
    326  1.1  mrg      insn in order to set the flags for the branch anyway.  */
    327  1.1  mrg   mask = -1u << misalign;
    328  1.1  mrg 
    329  1.1  mrg   /* Main loop processing 8 bytes at a time.  */
    330  1.1  mrg   goto start;
    331  1.1  mrg   do
    332  1.1  mrg     {
    333  1.1  mrg       data = *++p;
    334  1.1  mrg       mask = -1;
    335  1.1  mrg 
    336  1.1  mrg     start:
    337  1.1  mrg       t = __builtin_ia32_pcmpeqb(data, repl_nl);
    338  1.1  mrg       c = __builtin_ia32_pcmpeqb(data, repl_cr);
    339  1.1  mrg       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
    340  1.1  mrg       c = __builtin_ia32_pcmpeqb(data, repl_bs);
    341  1.1  mrg       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
    342  1.1  mrg       c = __builtin_ia32_pcmpeqb(data, repl_qm);
    343  1.1  mrg       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
    344  1.1  mrg       found = __builtin_ia32_pmovmskb (t);
    345  1.1  mrg       found &= mask;
    346  1.1  mrg     }
    347  1.1  mrg   while (!found);
    348  1.1  mrg 
    349  1.1  mrg   __builtin_ia32_emms ();
    350  1.1  mrg 
    351  1.1  mrg   /* FOUND contains 1 in bits for which we matched a relevant
    352  1.1  mrg      character.  Conversion to the byte index is trivial.  */
    353  1.1  mrg   found = __builtin_ctz(found);
    354  1.1  mrg   return (const uchar *)p + found;
    355  1.1  mrg }
    356  1.1  mrg 
    357  1.1  mrg /* A version of the fast scanner using SSE2 vectorized byte compare insns.  */
    358  1.1  mrg 
    359  1.1  mrg static const uchar *
    360  1.1  mrg #ifndef __SSE2__
    361  1.1  mrg __attribute__((__target__("sse2")))
    362  1.1  mrg #endif
    363  1.1  mrg search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
    364  1.1  mrg {
    365  1.1  mrg   typedef char v16qi __attribute__ ((__vector_size__ (16)));
    366  1.1  mrg 
    367  1.1  mrg   const v16qi repl_nl = *(const v16qi *)repl_chars[0];
    368  1.1  mrg   const v16qi repl_cr = *(const v16qi *)repl_chars[1];
    369  1.1  mrg   const v16qi repl_bs = *(const v16qi *)repl_chars[2];
    370  1.1  mrg   const v16qi repl_qm = *(const v16qi *)repl_chars[3];
    371  1.1  mrg 
    372  1.1  mrg   unsigned int misalign, found, mask;
    373  1.1  mrg   const v16qi *p;
    374  1.1  mrg   v16qi data, t;
    375  1.1  mrg 
    376  1.1  mrg   /* Align the source pointer.  */
    377  1.1  mrg   misalign = (uintptr_t)s & 15;
    378  1.1  mrg   p = (const v16qi *)((uintptr_t)s & -16);
    379  1.1  mrg   data = *p;
    380  1.1  mrg 
    381  1.1  mrg   /* Create a mask for the bytes that are valid within the first
    382  1.1  mrg      16-byte block.  The Idea here is that the AND with the mask
    383  1.1  mrg      within the loop is "free", since we need some AND or TEST
    384  1.1  mrg      insn in order to set the flags for the branch anyway.  */
    385  1.1  mrg   mask = -1u << misalign;
    386  1.1  mrg 
    387  1.1  mrg   /* Main loop processing 16 bytes at a time.  */
    388  1.1  mrg   goto start;
    389  1.1  mrg   do
    390  1.1  mrg     {
    391  1.1  mrg       data = *++p;
    392  1.1  mrg       mask = -1;
    393  1.1  mrg 
    394  1.1  mrg     start:
    395  1.1  mrg       t  = data == repl_nl;
    396  1.1  mrg       t |= data == repl_cr;
    397  1.1  mrg       t |= data == repl_bs;
    398  1.1  mrg       t |= data == repl_qm;
    399  1.1  mrg       found = __builtin_ia32_pmovmskb128 (t);
    400  1.1  mrg       found &= mask;
    401  1.1  mrg     }
    402  1.1  mrg   while (!found);
    403  1.1  mrg 
    404  1.1  mrg   /* FOUND contains 1 in bits for which we matched a relevant
    405  1.1  mrg      character.  Conversion to the byte index is trivial.  */
    406  1.1  mrg   found = __builtin_ctz(found);
    407  1.1  mrg   return (const uchar *)p + found;
    408  1.1  mrg }
    409  1.1  mrg 
    410  1.1  mrg #ifdef HAVE_SSE4
    411  1.1  mrg /* A version of the fast scanner using SSE 4.2 vectorized string insns.  */
    412  1.1  mrg 
    413  1.1  mrg static const uchar *
    414  1.1  mrg #ifndef __SSE4_2__
    415  1.1  mrg __attribute__((__target__("sse4.2")))
    416  1.1  mrg #endif
    417  1.1  mrg search_line_sse42 (const uchar *s, const uchar *end)
    418  1.1  mrg {
    419  1.1  mrg   typedef char v16qi __attribute__ ((__vector_size__ (16)));
    420  1.1  mrg   static const v16qi search = { '\n', '\r', '?', '\\' };
    421  1.1  mrg 
    422  1.1  mrg   uintptr_t si = (uintptr_t)s;
    423  1.1  mrg   uintptr_t index;
    424  1.1  mrg 
    425  1.1  mrg   /* Check for unaligned input.  */
    426  1.1  mrg   if (si & 15)
    427  1.1  mrg     {
    428  1.1  mrg       v16qi sv;
    429  1.1  mrg 
    430  1.1  mrg       if (__builtin_expect (end - s < 16, 0)
    431  1.1  mrg 	  && __builtin_expect ((si & 0xfff) > 0xff0, 0))
    432  1.1  mrg 	{
    433  1.1  mrg 	  /* There are less than 16 bytes left in the buffer, and less
    434  1.1  mrg 	     than 16 bytes left on the page.  Reading 16 bytes at this
    435  1.1  mrg 	     point might generate a spurious page fault.  Defer to the
    436  1.1  mrg 	     SSE2 implementation, which already handles alignment.  */
    437  1.1  mrg 	  return search_line_sse2 (s, end);
    438  1.1  mrg 	}
    439  1.1  mrg 
    440  1.1  mrg       /* ??? The builtin doesn't understand that the PCMPESTRI read from
    441  1.1  mrg 	 memory need not be aligned.  */
    442  1.1  mrg       sv = __builtin_ia32_loaddqu ((const char *) s);
    443  1.1  mrg       index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
    444  1.1  mrg 
    445  1.1  mrg       if (__builtin_expect (index < 16, 0))
    446  1.1  mrg 	goto found;
    447  1.1  mrg 
    448  1.1  mrg       /* Advance the pointer to an aligned address.  We will re-scan a
    449  1.1  mrg 	 few bytes, but we no longer need care for reading past the
    450  1.1  mrg 	 end of a page, since we're guaranteed a match.  */
    451  1.1  mrg       s = (const uchar *)((si + 15) & -16);
    452  1.1  mrg     }
    453  1.1  mrg 
    454  1.1  mrg   /* Main loop, processing 16 bytes at a time.  */
    455  1.1  mrg #ifdef __GCC_ASM_FLAG_OUTPUTS__
    456  1.1  mrg   while (1)
    457  1.1  mrg     {
    458  1.1  mrg       char f;
    459  1.1  mrg 
    460  1.1  mrg       /* By using inline assembly instead of the builtin,
    461  1.1  mrg 	 we can use the result, as well as the flags set.  */
    462  1.1  mrg       __asm ("%vpcmpestri\t$0, %2, %3"
    463  1.1  mrg 	     : "=c"(index), "=@ccc"(f)
    464  1.1  mrg 	     : "m"(*s), "x"(search), "a"(4), "d"(16));
    465  1.1  mrg       if (f)
    466  1.1  mrg 	break;
    467  1.1  mrg 
    468  1.1  mrg       s += 16;
    469  1.1  mrg     }
    470  1.1  mrg #else
    471  1.1  mrg   s -= 16;
    472  1.1  mrg   /* By doing the whole loop in inline assembly,
    473  1.1  mrg      we can make proper use of the flags set.  */
    474  1.1  mrg   __asm (      ".balign 16\n"
    475  1.1  mrg 	"0:	add $16, %1\n"
    476  1.1  mrg 	"	%vpcmpestri\t$0, (%1), %2\n"
    477  1.1  mrg 	"	jnc 0b"
    478  1.1  mrg 	: "=&c"(index), "+r"(s)
    479  1.1  mrg 	: "x"(search), "a"(4), "d"(16));
    480  1.1  mrg #endif
    481  1.1  mrg 
    482  1.1  mrg  found:
    483  1.1  mrg   return s + index;
    484  1.1  mrg }
    485  1.1  mrg 
    486  1.1  mrg #else
    487  1.1  mrg /* Work around out-dated assemblers without sse4 support.  */
    488  1.1  mrg #define search_line_sse42 search_line_sse2
    489  1.1  mrg #endif
    490  1.1  mrg 
    491  1.1  mrg /* Check the CPU capabilities.  */
    492  1.1  mrg 
    493  1.1  mrg #include "../gcc/config/i386/cpuid.h"
    494  1.1  mrg 
    495  1.1  mrg typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
    496  1.1  mrg static search_line_fast_type search_line_fast;
    497  1.1  mrg 
    498  1.1  mrg #define HAVE_init_vectorized_lexer 1
    499  1.1  mrg static inline void
    500  1.1  mrg init_vectorized_lexer (void)
    501  1.1  mrg {
    502  1.1  mrg   unsigned dummy, ecx = 0, edx = 0;
    503  1.1  mrg   search_line_fast_type impl = search_line_acc_char;
    504  1.1  mrg   int minimum = 0;
    505  1.1  mrg 
    506  1.1  mrg #if defined(__SSE4_2__)
    507  1.1  mrg   minimum = 3;
    508  1.1  mrg #elif defined(__SSE2__)
    509  1.1  mrg   minimum = 2;
    510  1.1  mrg #elif defined(__SSE__)
    511  1.1  mrg   minimum = 1;
    512  1.1  mrg #endif
    513  1.1  mrg 
    514  1.1  mrg   if (minimum == 3)
    515  1.1  mrg     impl = search_line_sse42;
    516  1.1  mrg   else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
    517  1.1  mrg     {
    518  1.1  mrg       if (minimum == 3 || (ecx & bit_SSE4_2))
    519  1.1  mrg         impl = search_line_sse42;
    520  1.1  mrg       else if (minimum == 2 || (edx & bit_SSE2))
    521  1.1  mrg 	impl = search_line_sse2;
    522  1.1  mrg       else if (minimum == 1 || (edx & bit_SSE))
    523  1.1  mrg 	impl = search_line_mmx;
    524  1.1  mrg     }
    525  1.1  mrg   else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
    526  1.1  mrg     {
    527  1.1  mrg       if (minimum == 1
    528  1.1  mrg 	  || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
    529  1.1  mrg 	impl = search_line_mmx;
    530  1.1  mrg     }
    531  1.1  mrg 
    532  1.1  mrg   search_line_fast = impl;
    533  1.1  mrg }
    534  1.1  mrg 
    535  1.1  mrg #elif (GCC_VERSION >= 4005) && defined(_ARCH_PWR8) && defined(__ALTIVEC__)
    536  1.1  mrg 
    537  1.1  mrg /* A vection of the fast scanner using AltiVec vectorized byte compares
    538  1.1  mrg    and VSX unaligned loads (when VSX is available).  This is otherwise
    539  1.1  mrg    the same as the AltiVec version.  */
    540  1.1  mrg 
    541  1.1  mrg ATTRIBUTE_NO_SANITIZE_UNDEFINED
    542  1.1  mrg static const uchar *
    543  1.1  mrg search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
    544  1.1  mrg {
    545  1.1  mrg   typedef __attribute__((altivec(vector))) unsigned char vc;
    546  1.1  mrg 
    547  1.1  mrg   const vc repl_nl = {
    548  1.1  mrg     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
    549  1.1  mrg     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
    550  1.1  mrg   };
    551  1.1  mrg   const vc repl_cr = {
    552  1.1  mrg     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
    553  1.1  mrg     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
    554  1.1  mrg   };
    555  1.1  mrg   const vc repl_bs = {
    556  1.1  mrg     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
    557  1.1  mrg     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
    558  1.1  mrg   };
    559  1.1  mrg   const vc repl_qm = {
    560  1.1  mrg     '?', '?', '?', '?', '?', '?', '?', '?',
    561  1.1  mrg     '?', '?', '?', '?', '?', '?', '?', '?',
    562  1.1  mrg   };
    563  1.1  mrg   const vc zero = { 0 };
    564  1.1  mrg 
    565  1.1  mrg   vc data, t;
    566  1.1  mrg 
    567  1.1  mrg   /* Main loop processing 16 bytes at a time.  */
    568  1.1  mrg   do
    569  1.1  mrg     {
    570  1.1  mrg       vc m_nl, m_cr, m_bs, m_qm;
    571  1.1  mrg 
    572  1.1  mrg       data = __builtin_vec_vsx_ld (0, s);
    573  1.1  mrg       s += 16;
    574  1.1  mrg 
    575  1.1  mrg       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
    576  1.1  mrg       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
    577  1.1  mrg       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
    578  1.1  mrg       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
    579  1.1  mrg       t = (m_nl | m_cr) | (m_bs | m_qm);
    580  1.1  mrg 
    581  1.1  mrg       /* T now contains 0xff in bytes for which we matched one of the relevant
    582  1.1  mrg 	 characters.  We want to exit the loop if any byte in T is non-zero.
    583  1.1  mrg 	 Below is the expansion of vec_any_ne(t, zero).  */
    584  1.1  mrg     }
    585  1.1  mrg   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
    586  1.1  mrg 
    587  1.1  mrg   /* Restore s to to point to the 16 bytes we just processed.  */
    588  1.1  mrg   s -= 16;
    589  1.1  mrg 
    590  1.1  mrg   {
    591  1.1  mrg #define N  (sizeof(vc) / sizeof(long))
    592  1.1  mrg 
    593  1.1  mrg     union {
    594  1.1  mrg       vc v;
    595  1.1  mrg       /* Statically assert that N is 2 or 4.  */
    596  1.1  mrg       unsigned long l[(N == 2 || N == 4) ? N : -1];
    597  1.1  mrg     } u;
    598  1.1  mrg     unsigned long l, i = 0;
    599  1.1  mrg 
    600  1.1  mrg     u.v = t;
    601  1.1  mrg 
    602  1.1  mrg     /* Find the first word of T that is non-zero.  */
    603  1.1  mrg     switch (N)
    604  1.1  mrg       {
    605  1.1  mrg       case 4:
    606  1.1  mrg 	l = u.l[i++];
    607  1.1  mrg 	if (l != 0)
    608  1.1  mrg 	  break;
    609  1.1  mrg 	s += sizeof(unsigned long);
    610  1.1  mrg 	l = u.l[i++];
    611  1.1  mrg 	if (l != 0)
    612  1.1  mrg 	  break;
    613  1.1  mrg 	s += sizeof(unsigned long);
    614  1.1  mrg 	/* FALLTHRU */
    615  1.1  mrg       case 2:
    616  1.1  mrg 	l = u.l[i++];
    617  1.1  mrg 	if (l != 0)
    618  1.1  mrg 	  break;
    619  1.1  mrg 	s += sizeof(unsigned long);
    620  1.1  mrg 	l = u.l[i];
    621  1.1  mrg       }
    622  1.1  mrg 
    623  1.1  mrg     /* L now contains 0xff in bytes for which we matched one of the
    624  1.1  mrg        relevant characters.  We can find the byte index by finding
    625  1.1  mrg        its bit index and dividing by 8.  */
    626  1.1  mrg #ifdef __BIG_ENDIAN__
    627  1.1  mrg     l = __builtin_clzl(l) >> 3;
    628  1.1  mrg #else
    629  1.1  mrg     l = __builtin_ctzl(l) >> 3;
    630  1.1  mrg #endif
    631  1.1  mrg     return s + l;
    632  1.1  mrg 
    633  1.1  mrg #undef N
    634  1.1  mrg   }
    635  1.1  mrg }
    636  1.1  mrg 
    637  1.1  mrg #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
    638  1.1  mrg 
    639  1.1  mrg /* A vection of the fast scanner using AltiVec vectorized byte compares.
    640  1.1  mrg    This cannot be used for little endian because vec_lvsl/lvsr are
    641  1.1  mrg    deprecated for little endian and the code won't work properly.  */
    642  1.1  mrg /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
    643  1.1  mrg    so we can't compile this function without -maltivec on the command line
    644  1.1  mrg    (or implied by some other switch).  */
    645  1.1  mrg 
    646  1.1  mrg static const uchar *
    647  1.1  mrg search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
    648  1.1  mrg {
    649  1.1  mrg   typedef __attribute__((altivec(vector))) unsigned char vc;
    650  1.1  mrg 
    651  1.1  mrg   const vc repl_nl = {
    652  1.1  mrg     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
    653  1.1  mrg     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
    654  1.1  mrg   };
    655  1.1  mrg   const vc repl_cr = {
    656  1.1  mrg     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
    657  1.1  mrg     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
    658  1.1  mrg   };
    659  1.1  mrg   const vc repl_bs = {
    660  1.1  mrg     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
    661  1.1  mrg     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
    662  1.1  mrg   };
    663  1.1  mrg   const vc repl_qm = {
    664  1.1  mrg     '?', '?', '?', '?', '?', '?', '?', '?',
    665  1.1  mrg     '?', '?', '?', '?', '?', '?', '?', '?',
    666  1.1  mrg   };
    667  1.1  mrg   const vc ones = {
    668  1.1  mrg     -1, -1, -1, -1, -1, -1, -1, -1,
    669  1.1  mrg     -1, -1, -1, -1, -1, -1, -1, -1,
    670  1.1  mrg   };
    671  1.1  mrg   const vc zero = { 0 };
    672  1.1  mrg 
    673  1.1  mrg   vc data, mask, t;
    674  1.1  mrg 
    675  1.1  mrg   /* Altivec loads automatically mask addresses with -16.  This lets us
    676  1.1  mrg      issue the first load as early as possible.  */
    677  1.1  mrg   data = __builtin_vec_ld(0, (const vc *)s);
    678  1.1  mrg 
    679  1.1  mrg   /* Discard bytes before the beginning of the buffer.  Do this by
    680  1.1  mrg      beginning with all ones and shifting in zeros according to the
    681  1.1  mrg      mis-alignment.  The LVSR instruction pulls the exact shift we
    682  1.1  mrg      want from the address.  */
    683  1.1  mrg   mask = __builtin_vec_lvsr(0, s);
    684  1.1  mrg   mask = __builtin_vec_perm(zero, ones, mask);
    685  1.1  mrg   data &= mask;
    686  1.1  mrg 
    687  1.1  mrg   /* While altivec loads mask addresses, we still need to align S so
    688  1.1  mrg      that the offset we compute at the end is correct.  */
    689  1.1  mrg   s = (const uchar *)((uintptr_t)s & -16);
    690  1.1  mrg 
    691  1.1  mrg   /* Main loop processing 16 bytes at a time.  */
    692  1.1  mrg   goto start;
    693  1.1  mrg   do
    694  1.1  mrg     {
    695  1.1  mrg       vc m_nl, m_cr, m_bs, m_qm;
    696  1.1  mrg 
    697  1.1  mrg       s += 16;
    698  1.1  mrg       data = __builtin_vec_ld(0, (const vc *)s);
    699  1.1  mrg 
    700  1.1  mrg     start:
    701  1.1  mrg       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
    702  1.1  mrg       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
    703  1.1  mrg       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
    704  1.1  mrg       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
    705  1.1  mrg       t = (m_nl | m_cr) | (m_bs | m_qm);
    706  1.1  mrg 
    707  1.1  mrg       /* T now contains 0xff in bytes for which we matched one of the relevant
    708  1.1  mrg 	 characters.  We want to exit the loop if any byte in T is non-zero.
    709  1.1  mrg 	 Below is the expansion of vec_any_ne(t, zero).  */
    710  1.1  mrg     }
    711  1.1  mrg   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
    712  1.1  mrg 
    713  1.1  mrg   {
    714  1.1  mrg #define N  (sizeof(vc) / sizeof(long))
    715  1.1  mrg 
    716  1.1  mrg     union {
    717  1.1  mrg       vc v;
    718  1.1  mrg       /* Statically assert that N is 2 or 4.  */
    719  1.1  mrg       unsigned long l[(N == 2 || N == 4) ? N : -1];
    720  1.1  mrg     } u;
    721  1.1  mrg     unsigned long l, i = 0;
    722  1.1  mrg 
    723  1.1  mrg     u.v = t;
    724  1.1  mrg 
    725  1.1  mrg     /* Find the first word of T that is non-zero.  */
    726  1.1  mrg     switch (N)
    727  1.1  mrg       {
    728  1.1  mrg       case 4:
    729  1.1  mrg 	l = u.l[i++];
    730  1.1  mrg 	if (l != 0)
    731  1.1  mrg 	  break;
    732  1.1  mrg 	s += sizeof(unsigned long);
    733  1.1  mrg 	l = u.l[i++];
    734  1.1  mrg 	if (l != 0)
    735  1.1  mrg 	  break;
    736  1.1  mrg 	s += sizeof(unsigned long);
    737  1.1  mrg 	/* FALLTHROUGH */
    738  1.1  mrg       case 2:
    739  1.1  mrg 	l = u.l[i++];
    740  1.1  mrg 	if (l != 0)
    741  1.1  mrg 	  break;
    742  1.1  mrg 	s += sizeof(unsigned long);
    743  1.1  mrg 	l = u.l[i];
    744  1.1  mrg       }
    745  1.1  mrg 
    746  1.1  mrg     /* L now contains 0xff in bytes for which we matched one of the
    747  1.1  mrg        relevant characters.  We can find the byte index by finding
    748  1.1  mrg        its bit index and dividing by 8.  */
    749  1.1  mrg     l = __builtin_clzl(l) >> 3;
    750  1.1  mrg     return s + l;
    751  1.1  mrg 
    752  1.1  mrg #undef N
    753  1.1  mrg   }
    754  1.1  mrg }
    755  1.1  mrg 
    756  1.1  mrg #elif defined (__ARM_NEON) && defined (__ARM_64BIT_STATE)
    757  1.1  mrg #include "arm_neon.h"
    758  1.1  mrg 
    759  1.1  mrg /* This doesn't have to be the exact page size, but no system may use
    760  1.1  mrg    a size smaller than this.  ARMv8 requires a minimum page size of
    761  1.1  mrg    4k.  The impact of being conservative here is a small number of
    762  1.1  mrg    cases will take the slightly slower entry path into the main
    763  1.1  mrg    loop.  */
    764  1.1  mrg 
    765  1.1  mrg #define AARCH64_MIN_PAGE_SIZE 4096
    766  1.1  mrg 
    767  1.1  mrg static const uchar *
    768  1.1  mrg search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
    769  1.1  mrg {
    770  1.1  mrg   const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
    771  1.1  mrg   const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
    772  1.1  mrg   const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
    773  1.1  mrg   const uint8x16_t repl_qm = vdupq_n_u8 ('?');
    774  1.1  mrg   const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
    775  1.1  mrg 
    776  1.1  mrg #ifdef __ARM_BIG_ENDIAN
    777  1.1  mrg   const int16x8_t shift = {8, 8, 8, 8, 0, 0, 0, 0};
    778  1.1  mrg #else
    779  1.1  mrg   const int16x8_t shift = {0, 0, 0, 0, 8, 8, 8, 8};
    780  1.1  mrg #endif
    781  1.1  mrg 
    782  1.1  mrg   unsigned int found;
    783  1.1  mrg   const uint8_t *p;
    784  1.1  mrg   uint8x16_t data;
    785  1.1  mrg   uint8x16_t t;
    786  1.1  mrg   uint16x8_t m;
    787  1.1  mrg   uint8x16_t u, v, w;
    788  1.1  mrg 
    789  1.1  mrg   /* Align the source pointer.  */
    790  1.1  mrg   p = (const uint8_t *)((uintptr_t)s & -16);
    791  1.1  mrg 
    792  1.1  mrg   /* Assuming random string start positions, with a 4k page size we'll take
    793  1.1  mrg      the slow path about 0.37% of the time.  */
    794  1.1  mrg   if (__builtin_expect ((AARCH64_MIN_PAGE_SIZE
    795  1.1  mrg 			 - (((uintptr_t) s) & (AARCH64_MIN_PAGE_SIZE - 1)))
    796  1.1  mrg 			< 16, 0))
    797  1.1  mrg     {
    798  1.1  mrg       /* Slow path: the string starts near a possible page boundary.  */
    799  1.1  mrg       uint32_t misalign, mask;
    800  1.1  mrg 
    801  1.1  mrg       misalign = (uintptr_t)s & 15;
    802  1.1  mrg       mask = (-1u << misalign) & 0xffff;
    803  1.1  mrg       data = vld1q_u8 (p);
    804  1.1  mrg       t = vceqq_u8 (data, repl_nl);
    805  1.1  mrg       u = vceqq_u8 (data, repl_cr);
    806  1.1  mrg       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
    807  1.1  mrg       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
    808  1.1  mrg       t = vorrq_u8 (v, w);
    809  1.1  mrg       t = vandq_u8 (t, xmask);
    810  1.1  mrg       m = vpaddlq_u8 (t);
    811  1.1  mrg       m = vshlq_u16 (m, shift);
    812  1.1  mrg       found = vaddvq_u16 (m);
    813  1.1  mrg       found &= mask;
    814  1.1  mrg       if (found)
    815  1.1  mrg 	return (const uchar*)p + __builtin_ctz (found);
    816  1.1  mrg     }
    817  1.1  mrg   else
    818  1.1  mrg     {
    819  1.1  mrg       data = vld1q_u8 ((const uint8_t *) s);
    820  1.1  mrg       t = vceqq_u8 (data, repl_nl);
    821  1.1  mrg       u = vceqq_u8 (data, repl_cr);
    822  1.1  mrg       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
    823  1.1  mrg       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
    824  1.1  mrg       t = vorrq_u8 (v, w);
    825  1.1  mrg       if (__builtin_expect (vpaddd_u64 ((uint64x2_t)t) != 0, 0))
    826  1.1  mrg 	goto done;
    827  1.1  mrg     }
    828  1.1  mrg 
    829  1.1  mrg   do
    830  1.1  mrg     {
    831  1.1  mrg       p += 16;
    832  1.1  mrg       data = vld1q_u8 (p);
    833  1.1  mrg       t = vceqq_u8 (data, repl_nl);
    834  1.1  mrg       u = vceqq_u8 (data, repl_cr);
    835  1.1  mrg       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
    836  1.1  mrg       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
    837  1.1  mrg       t = vorrq_u8 (v, w);
    838  1.1  mrg     } while (!vpaddd_u64 ((uint64x2_t)t));
    839  1.1  mrg 
    840  1.1  mrg done:
    841  1.1  mrg   /* Now that we've found the terminating substring, work out precisely where
    842  1.1  mrg      we need to stop.  */
    843  1.1  mrg   t = vandq_u8 (t, xmask);
    844  1.1  mrg   m = vpaddlq_u8 (t);
    845  1.1  mrg   m = vshlq_u16 (m, shift);
    846  1.1  mrg   found = vaddvq_u16 (m);
    847  1.1  mrg   return (((((uintptr_t) p) < (uintptr_t) s) ? s : (const uchar *)p)
    848  1.1  mrg 	  + __builtin_ctz (found));
    849  1.1  mrg }
    850  1.1  mrg 
    851  1.1  mrg #elif defined (__ARM_NEON)
    852  1.1  mrg #include "arm_neon.h"
    853  1.1  mrg 
    854  1.1  mrg static const uchar *
    855  1.1  mrg search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
    856  1.1  mrg {
    857  1.1  mrg   const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
    858  1.1  mrg   const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
    859  1.1  mrg   const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
    860  1.1  mrg   const uint8x16_t repl_qm = vdupq_n_u8 ('?');
    861  1.1  mrg   const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
    862  1.1  mrg 
    863  1.1  mrg   unsigned int misalign, found, mask;
    864  1.1  mrg   const uint8_t *p;
    865  1.1  mrg   uint8x16_t data;
    866  1.1  mrg 
    867  1.1  mrg   /* Align the source pointer.  */
    868  1.1  mrg   misalign = (uintptr_t)s & 15;
    869  1.1  mrg   p = (const uint8_t *)((uintptr_t)s & -16);
    870  1.1  mrg   data = vld1q_u8 (p);
    871  1.1  mrg 
    872  1.1  mrg   /* Create a mask for the bytes that are valid within the first
    873  1.1  mrg      16-byte block.  The Idea here is that the AND with the mask
    874  1.1  mrg      within the loop is "free", since we need some AND or TEST
    875  1.1  mrg      insn in order to set the flags for the branch anyway.  */
    876  1.1  mrg   mask = (-1u << misalign) & 0xffff;
    877  1.1  mrg 
    878  1.1  mrg   /* Main loop, processing 16 bytes at a time.  */
    879  1.1  mrg   goto start;
    880  1.1  mrg 
    881  1.1  mrg   do
    882  1.1  mrg     {
    883  1.1  mrg       uint8x8_t l;
    884  1.1  mrg       uint16x4_t m;
    885  1.1  mrg       uint32x2_t n;
    886  1.1  mrg       uint8x16_t t, u, v, w;
    887  1.1  mrg 
    888  1.1  mrg       p += 16;
    889  1.1  mrg       data = vld1q_u8 (p);
    890  1.1  mrg       mask = 0xffff;
    891  1.1  mrg 
    892  1.1  mrg     start:
    893  1.1  mrg       t = vceqq_u8 (data, repl_nl);
    894  1.1  mrg       u = vceqq_u8 (data, repl_cr);
    895  1.1  mrg       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
    896  1.1  mrg       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
    897  1.1  mrg       t = vandq_u8 (vorrq_u8 (v, w), xmask);
    898  1.1  mrg       l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
    899  1.1  mrg       m = vpaddl_u8 (l);
    900  1.1  mrg       n = vpaddl_u16 (m);
    901  1.1  mrg 
    902  1.1  mrg       found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
    903  1.1  mrg 	      vshr_n_u64 ((uint64x1_t) n, 24)), 0);
    904  1.1  mrg       found &= mask;
    905  1.1  mrg     }
    906  1.1  mrg   while (!found);
    907  1.1  mrg 
    908  1.1  mrg   /* FOUND contains 1 in bits for which we matched a relevant
    909  1.1  mrg      character.  Conversion to the byte index is trivial.  */
    910  1.1  mrg   found = __builtin_ctz (found);
    911  1.1  mrg   return (const uchar *)p + found;
    912  1.1  mrg }
    913  1.1  mrg 
    914  1.1  mrg #else
    915  1.1  mrg 
    916  1.1  mrg /* We only have one accelerated alternative.  Use a direct call so that
    917  1.1  mrg    we encourage inlining.  */
    918  1.1  mrg 
    919  1.1  mrg #define search_line_fast  search_line_acc_char
    920  1.1  mrg 
    921  1.1  mrg #endif
    922  1.1  mrg 
    923  1.1  mrg /* Initialize the lexer if needed.  */
    924  1.1  mrg 
    925  1.1  mrg void
    926  1.1  mrg _cpp_init_lexer (void)
    927  1.1  mrg {
    928  1.1  mrg #ifdef HAVE_init_vectorized_lexer
    929  1.1  mrg   init_vectorized_lexer ();
    930  1.1  mrg #endif
    931  1.1  mrg }
    932  1.1  mrg 
    933  1.1  mrg /* Returns with a logical line that contains no escaped newlines or
    934  1.1  mrg    trigraphs.  This is a time-critical inner loop.  */
    935  1.1  mrg void
    936  1.1  mrg _cpp_clean_line (cpp_reader *pfile)
    937  1.1  mrg {
    938  1.1  mrg   cpp_buffer *buffer;
    939  1.1  mrg   const uchar *s;
    940  1.1  mrg   uchar c, *d, *p;
    941  1.1  mrg 
    942  1.1  mrg   buffer = pfile->buffer;
    943  1.1  mrg   buffer->cur_note = buffer->notes_used = 0;
    944  1.1  mrg   buffer->cur = buffer->line_base = buffer->next_line;
    945  1.1  mrg   buffer->need_line = false;
    946  1.1  mrg   s = buffer->next_line;
    947  1.1  mrg 
    948  1.1  mrg   if (!buffer->from_stage3)
    949  1.1  mrg     {
    950  1.1  mrg       const uchar *pbackslash = NULL;
    951  1.1  mrg 
    952  1.1  mrg       /* Fast path.  This is the common case of an un-escaped line with
    953  1.1  mrg 	 no trigraphs.  The primary win here is by not writing any
    954  1.1  mrg 	 data back to memory until we have to.  */
    955  1.1  mrg       while (1)
    956  1.1  mrg 	{
    957  1.1  mrg 	  /* Perform an optimized search for \n, \r, \\, ?.  */
    958  1.1  mrg 	  s = search_line_fast (s, buffer->rlimit);
    959  1.1  mrg 
    960  1.1  mrg 	  c = *s;
    961  1.1  mrg 	  if (c == '\\')
    962  1.1  mrg 	    {
    963  1.1  mrg 	      /* Record the location of the backslash and continue.  */
    964  1.1  mrg 	      pbackslash = s++;
    965  1.1  mrg 	    }
    966  1.1  mrg 	  else if (__builtin_expect (c == '?', 0))
    967  1.1  mrg 	    {
    968  1.1  mrg 	      if (__builtin_expect (s[1] == '?', false)
    969  1.1  mrg 		   && _cpp_trigraph_map[s[2]])
    970  1.1  mrg 		{
    971  1.1  mrg 		  /* Have a trigraph.  We may or may not have to convert
    972  1.1  mrg 		     it.  Add a line note regardless, for -Wtrigraphs.  */
    973  1.1  mrg 		  add_line_note (buffer, s, s[2]);
    974  1.1  mrg 		  if (CPP_OPTION (pfile, trigraphs))
    975  1.1  mrg 		    {
    976  1.1  mrg 		      /* We do, and that means we have to switch to the
    977  1.1  mrg 		         slow path.  */
    978  1.1  mrg 		      d = (uchar *) s;
    979  1.1  mrg 		      *d = _cpp_trigraph_map[s[2]];
    980  1.1  mrg 		      s += 2;
    981  1.1  mrg 		      goto slow_path;
    982  1.1  mrg 		    }
    983  1.1  mrg 		}
    984  1.1  mrg 	      /* Not a trigraph.  Continue on fast-path.  */
    985  1.1  mrg 	      s++;
    986  1.1  mrg 	    }
    987  1.1  mrg 	  else
    988  1.1  mrg 	    break;
    989  1.1  mrg 	}
    990  1.1  mrg 
    991  1.1  mrg       /* This must be \r or \n.  We're either done, or we'll be forced
    992  1.1  mrg 	 to write back to the buffer and continue on the slow path.  */
    993  1.1  mrg       d = (uchar *) s;
    994  1.1  mrg 
    995  1.1  mrg       if (__builtin_expect (s == buffer->rlimit, false))
    996  1.1  mrg 	goto done;
    997  1.1  mrg 
    998  1.1  mrg       /* DOS line ending? */
    999  1.1  mrg       if (__builtin_expect (c == '\r', false) && s[1] == '\n')
   1000  1.1  mrg 	{
   1001  1.1  mrg 	  s++;
   1002  1.1  mrg 	  if (s == buffer->rlimit)
   1003  1.1  mrg 	    goto done;
   1004  1.1  mrg 	}
   1005  1.1  mrg 
   1006  1.1  mrg       if (__builtin_expect (pbackslash == NULL, true))
   1007  1.1  mrg 	goto done;
   1008  1.1  mrg 
   1009  1.1  mrg       /* Check for escaped newline.  */
   1010  1.1  mrg       p = d;
   1011  1.1  mrg       while (is_nvspace (p[-1]))
   1012  1.1  mrg 	p--;
   1013  1.1  mrg       if (p - 1 != pbackslash)
   1014  1.1  mrg 	goto done;
   1015  1.1  mrg 
   1016  1.1  mrg       /* Have an escaped newline; process it and proceed to
   1017  1.1  mrg 	 the slow path.  */
   1018  1.1  mrg       add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
   1019  1.1  mrg       d = p - 2;
   1020  1.1  mrg       buffer->next_line = p - 1;
   1021  1.1  mrg 
   1022  1.1  mrg     slow_path:
   1023  1.1  mrg       while (1)
   1024  1.1  mrg 	{
   1025  1.1  mrg 	  c = *++s;
   1026  1.1  mrg 	  *++d = c;
   1027  1.1  mrg 
   1028  1.1  mrg 	  if (c == '\n' || c == '\r')
   1029  1.1  mrg 	    {
   1030  1.1  mrg 	      /* Handle DOS line endings.  */
   1031  1.1  mrg 	      if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
   1032  1.1  mrg 		s++;
   1033  1.1  mrg 	      if (s == buffer->rlimit)
   1034  1.1  mrg 		break;
   1035  1.1  mrg 
   1036  1.1  mrg 	      /* Escaped?  */
   1037  1.1  mrg 	      p = d;
   1038  1.1  mrg 	      while (p != buffer->next_line && is_nvspace (p[-1]))
   1039  1.1  mrg 		p--;
   1040  1.1  mrg 	      if (p == buffer->next_line || p[-1] != '\\')
   1041  1.1  mrg 		break;
   1042  1.1  mrg 
   1043  1.1  mrg 	      add_line_note (buffer, p - 1, p != d ? ' ': '\\');
   1044  1.1  mrg 	      d = p - 2;
   1045  1.1  mrg 	      buffer->next_line = p - 1;
   1046  1.1  mrg 	    }
   1047  1.1  mrg 	  else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
   1048  1.1  mrg 	    {
   1049  1.1  mrg 	      /* Add a note regardless, for the benefit of -Wtrigraphs.  */
   1050  1.1  mrg 	      add_line_note (buffer, d, s[2]);
   1051  1.1  mrg 	      if (CPP_OPTION (pfile, trigraphs))
   1052  1.1  mrg 		{
   1053  1.1  mrg 		  *d = _cpp_trigraph_map[s[2]];
   1054  1.1  mrg 		  s += 2;
   1055  1.1  mrg 		}
   1056  1.1  mrg 	    }
   1057  1.1  mrg 	}
   1058  1.1  mrg     }
   1059  1.1  mrg   else
   1060  1.1  mrg     {
   1061  1.1  mrg       while (*s != '\n' && *s != '\r')
   1062  1.1  mrg 	s++;
   1063  1.1  mrg       d = (uchar *) s;
   1064  1.1  mrg 
   1065  1.1  mrg       /* Handle DOS line endings.  */
   1066  1.1  mrg       if (*s == '\r' && s + 1 != buffer->rlimit && s[1] == '\n')
   1067  1.1  mrg 	s++;
   1068  1.1  mrg     }
   1069  1.1  mrg 
   1070  1.1  mrg  done:
   1071  1.1  mrg   *d = '\n';
   1072  1.1  mrg   /* A sentinel note that should never be processed.  */
   1073  1.1  mrg   add_line_note (buffer, d + 1, '\n');
   1074  1.1  mrg   buffer->next_line = s + 1;
   1075  1.1  mrg }
   1076  1.1  mrg 
   1077  1.1  mrg /* Return true if the trigraph indicated by NOTE should be warned
   1078  1.1  mrg    about in a comment.  */
   1079  1.1  mrg static bool
   1080  1.1  mrg warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
   1081  1.1  mrg {
   1082  1.1  mrg   const uchar *p;
   1083  1.1  mrg 
   1084  1.1  mrg   /* Within comments we don't warn about trigraphs, unless the
   1085  1.1  mrg      trigraph forms an escaped newline, as that may change
   1086  1.1  mrg      behavior.  */
   1087  1.1  mrg   if (note->type != '/')
   1088  1.1  mrg     return false;
   1089  1.1  mrg 
   1090  1.1  mrg   /* If -trigraphs, then this was an escaped newline iff the next note
   1091  1.1  mrg      is coincident.  */
   1092  1.1  mrg   if (CPP_OPTION (pfile, trigraphs))
   1093  1.1  mrg     return note[1].pos == note->pos;
   1094  1.1  mrg 
   1095  1.1  mrg   /* Otherwise, see if this forms an escaped newline.  */
   1096  1.1  mrg   p = note->pos + 3;
   1097  1.1  mrg   while (is_nvspace (*p))
   1098  1.1  mrg     p++;
   1099  1.1  mrg 
   1100  1.1  mrg   /* There might have been escaped newlines between the trigraph and the
   1101  1.1  mrg      newline we found.  Hence the position test.  */
   1102  1.1  mrg   return (*p == '\n' && p < note[1].pos);
   1103  1.1  mrg }
   1104  1.1  mrg 
   1105  1.1  mrg /* Process the notes created by add_line_note as far as the current
   1106  1.1  mrg    location.  */
   1107  1.1  mrg void
   1108  1.1  mrg _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
   1109  1.1  mrg {
   1110  1.1  mrg   cpp_buffer *buffer = pfile->buffer;
   1111  1.1  mrg 
   1112  1.1  mrg   for (;;)
   1113  1.1  mrg     {
   1114  1.1  mrg       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
   1115  1.1  mrg       unsigned int col;
   1116  1.1  mrg 
   1117  1.1  mrg       if (note->pos > buffer->cur)
   1118  1.1  mrg 	break;
   1119  1.1  mrg 
   1120  1.1  mrg       buffer->cur_note++;
   1121  1.1  mrg       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
   1122  1.1  mrg 
   1123  1.1  mrg       if (note->type == '\\' || note->type == ' ')
   1124  1.1  mrg 	{
   1125  1.1  mrg 	  if (note->type == ' ' && !in_comment)
   1126  1.1  mrg 	    cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
   1127  1.1  mrg 				 "backslash and newline separated by space");
   1128  1.1  mrg 
   1129  1.1  mrg 	  if (buffer->next_line > buffer->rlimit)
   1130  1.1  mrg 	    {
   1131  1.1  mrg 	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
   1132  1.1  mrg 				   "backslash-newline at end of file");
   1133  1.1  mrg 	      /* Prevent "no newline at end of file" warning.  */
   1134  1.1  mrg 	      buffer->next_line = buffer->rlimit;
   1135  1.1  mrg 	    }
   1136  1.1  mrg 
   1137  1.1  mrg 	  buffer->line_base = note->pos;
   1138  1.1  mrg 	  CPP_INCREMENT_LINE (pfile, 0);
   1139  1.1  mrg 	}
   1140  1.1  mrg       else if (_cpp_trigraph_map[note->type])
   1141  1.1  mrg 	{
   1142  1.1  mrg 	  if (CPP_OPTION (pfile, warn_trigraphs)
   1143  1.1  mrg 	      && (!in_comment || warn_in_comment (pfile, note)))
   1144  1.1  mrg 	    {
   1145  1.1  mrg 	      if (CPP_OPTION (pfile, trigraphs))
   1146  1.1  mrg 		cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
   1147  1.1  mrg                                        pfile->line_table->highest_line, col,
   1148  1.1  mrg 				       "trigraph ??%c converted to %c",
   1149  1.1  mrg 				       note->type,
   1150  1.1  mrg 				       (int) _cpp_trigraph_map[note->type]);
   1151  1.1  mrg 	      else
   1152  1.1  mrg 		{
   1153  1.1  mrg 		  cpp_warning_with_line
   1154  1.1  mrg 		    (pfile, CPP_W_TRIGRAPHS,
   1155  1.1  mrg                      pfile->line_table->highest_line, col,
   1156  1.1  mrg 		     "trigraph ??%c ignored, use -trigraphs to enable",
   1157  1.1  mrg 		     note->type);
   1158  1.1  mrg 		}
   1159  1.1  mrg 	    }
   1160  1.1  mrg 	}
   1161  1.1  mrg       else if (note->type == 0)
   1162  1.1  mrg 	/* Already processed in lex_raw_string.  */;
   1163  1.1  mrg       else
   1164  1.1  mrg 	abort ();
   1165  1.1  mrg     }
   1166  1.1  mrg }
   1167  1.1  mrg 
   1168  1.1  mrg namespace bidi {
   1169  1.1  mrg   enum class kind {
   1170  1.1  mrg     NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL
   1171  1.1  mrg   };
   1172  1.1  mrg 
   1173  1.1  mrg   /* All the UTF-8 encodings of bidi characters start with E2.  */
   1174  1.1  mrg   constexpr uchar utf8_start = 0xe2;
   1175  1.1  mrg 
   1176  1.1  mrg   struct context
   1177  1.1  mrg   {
   1178  1.1  mrg     context () {}
   1179  1.1  mrg     context (location_t loc, kind k, bool pdf, bool ucn)
   1180  1.1  mrg     : m_loc (loc), m_kind (k), m_pdf (pdf), m_ucn (ucn)
   1181  1.1  mrg     {
   1182  1.1  mrg     }
   1183  1.1  mrg 
   1184  1.1  mrg     kind get_pop_kind () const
   1185  1.1  mrg     {
   1186  1.1  mrg       return m_pdf ? kind::PDF : kind::PDI;
   1187  1.1  mrg     }
   1188  1.1  mrg     bool ucn_p () const
   1189  1.1  mrg     {
   1190  1.1  mrg       return m_ucn;
   1191  1.1  mrg     }
   1192  1.1  mrg 
   1193  1.1  mrg     location_t m_loc;
   1194  1.1  mrg     kind m_kind;
   1195  1.1  mrg     unsigned m_pdf : 1;
   1196  1.1  mrg     unsigned m_ucn : 1;
   1197  1.1  mrg   };
   1198  1.1  mrg 
   1199  1.1  mrg   /* A vector holding currently open bidi contexts.  We use a char for
   1200  1.1  mrg      each context, its LSB is 1 if it represents a PDF context, 0 if it
   1201  1.1  mrg      represents a PDI context.  The next bit is 1 if this context was open
   1202  1.1  mrg      by a bidi character written as a UCN, and 0 when it was UTF-8.  */
   1203  1.1  mrg   semi_embedded_vec <context, 16> vec;
   1204  1.1  mrg 
   1205  1.1  mrg   /* Close the whole comment/identifier/string literal/character constant
   1206  1.1  mrg      context.  */
   1207  1.1  mrg   void on_close ()
   1208  1.1  mrg   {
   1209  1.1  mrg     vec.truncate (0);
   1210  1.1  mrg   }
   1211  1.1  mrg 
   1212  1.1  mrg   /* Pop the last element in the vector.  */
   1213  1.1  mrg   void pop ()
   1214  1.1  mrg   {
   1215  1.1  mrg     unsigned int len = vec.count ();
   1216  1.1  mrg     gcc_checking_assert (len > 0);
   1217  1.1  mrg     vec.truncate (len - 1);
   1218  1.1  mrg   }
   1219  1.1  mrg 
   1220  1.1  mrg   /* Return the pop kind of the context of the Ith element.  */
   1221  1.1  mrg   kind pop_kind_at (unsigned int i)
   1222  1.1  mrg   {
   1223  1.1  mrg     return vec[i].get_pop_kind ();
   1224  1.1  mrg   }
   1225  1.1  mrg 
   1226  1.1  mrg   /* Return the pop kind of the context that is currently opened.  */
   1227  1.1  mrg   kind current_ctx ()
   1228  1.1  mrg   {
   1229  1.1  mrg     unsigned int len = vec.count ();
   1230  1.1  mrg     if (len == 0)
   1231  1.1  mrg       return kind::NONE;
   1232  1.1  mrg     return vec[len - 1].get_pop_kind ();
   1233  1.1  mrg   }
   1234  1.1  mrg 
   1235  1.1  mrg   /* Return true if the current context comes from a UCN origin, that is,
   1236  1.1  mrg      the bidi char which started this bidi context was written as a UCN.  */
   1237  1.1  mrg   bool current_ctx_ucn_p ()
   1238  1.1  mrg   {
   1239  1.1  mrg     unsigned int len = vec.count ();
   1240  1.1  mrg     gcc_checking_assert (len > 0);
   1241  1.1  mrg     return vec[len - 1].m_ucn;
   1242  1.1  mrg   }
   1243  1.1  mrg 
   1244  1.1  mrg   location_t current_ctx_loc ()
   1245  1.1  mrg   {
   1246  1.1  mrg     unsigned int len = vec.count ();
   1247  1.1  mrg     gcc_checking_assert (len > 0);
   1248  1.1  mrg     return vec[len - 1].m_loc;
   1249  1.1  mrg   }
   1250  1.1  mrg 
   1251  1.1  mrg   /* We've read a bidi char, update the current vector as necessary.
   1252  1.1  mrg      LOC is only valid when K is not kind::NONE.  */
   1253  1.1  mrg   void on_char (kind k, bool ucn_p, location_t loc)
   1254  1.1  mrg   {
   1255  1.1  mrg     switch (k)
   1256  1.1  mrg       {
   1257  1.1  mrg       case kind::LRE:
   1258  1.1  mrg       case kind::RLE:
   1259  1.1  mrg       case kind::LRO:
   1260  1.1  mrg       case kind::RLO:
   1261  1.1  mrg 	vec.push (context (loc, k, true, ucn_p));
   1262  1.1  mrg 	break;
   1263  1.1  mrg       case kind::LRI:
   1264  1.1  mrg       case kind::RLI:
   1265  1.1  mrg       case kind::FSI:
   1266  1.1  mrg 	vec.push (context (loc, k, false, ucn_p));
   1267  1.1  mrg 	break;
   1268  1.1  mrg       /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO
   1269  1.1  mrg 	 whose scope has not yet been terminated.  */
   1270  1.1  mrg       case kind::PDF:
   1271  1.1  mrg 	if (current_ctx () == kind::PDF)
   1272  1.1  mrg 	  pop ();
   1273  1.1  mrg 	break;
   1274  1.1  mrg       /* PDI terminates the scope of the last LRI, RLI, or FSI whose
   1275  1.1  mrg 	 scope has not yet been terminated, as well as the scopes of
   1276  1.1  mrg 	 any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not
   1277  1.1  mrg 	 yet been terminated.  */
   1278  1.1  mrg       case kind::PDI:
   1279  1.1  mrg 	for (int i = vec.count () - 1; i >= 0; --i)
   1280  1.1  mrg 	  if (pop_kind_at (i) == kind::PDI)
   1281  1.1  mrg 	    {
   1282  1.1  mrg 	      vec.truncate (i);
   1283  1.1  mrg 	      break;
   1284  1.1  mrg 	    }
   1285  1.1  mrg 	break;
   1286  1.1  mrg       case kind::LTR:
   1287  1.1  mrg       case kind::RTL:
   1288  1.1  mrg 	/* These aren't popped by a PDF/PDI.  */
   1289  1.1  mrg 	break;
   1290  1.1  mrg       ATTR_LIKELY case kind::NONE:
   1291  1.1  mrg 	break;
   1292  1.1  mrg       default:
   1293  1.1  mrg 	abort ();
   1294  1.1  mrg       }
   1295  1.1  mrg   }
   1296  1.1  mrg 
   1297  1.1  mrg   /* Return a descriptive string for K.  */
   1298  1.1  mrg   const char *to_str (kind k)
   1299  1.1  mrg   {
   1300  1.1  mrg     switch (k)
   1301  1.1  mrg       {
   1302  1.1  mrg       case kind::LRE:
   1303  1.1  mrg 	return "U+202A (LEFT-TO-RIGHT EMBEDDING)";
   1304  1.1  mrg       case kind::RLE:
   1305  1.1  mrg 	return "U+202B (RIGHT-TO-LEFT EMBEDDING)";
   1306  1.1  mrg       case kind::LRO:
   1307  1.1  mrg 	return "U+202D (LEFT-TO-RIGHT OVERRIDE)";
   1308  1.1  mrg       case kind::RLO:
   1309  1.1  mrg 	return "U+202E (RIGHT-TO-LEFT OVERRIDE)";
   1310  1.1  mrg       case kind::LRI:
   1311  1.1  mrg 	return "U+2066 (LEFT-TO-RIGHT ISOLATE)";
   1312  1.1  mrg       case kind::RLI:
   1313  1.1  mrg 	return "U+2067 (RIGHT-TO-LEFT ISOLATE)";
   1314  1.1  mrg       case kind::FSI:
   1315  1.1  mrg 	return "U+2068 (FIRST STRONG ISOLATE)";
   1316  1.1  mrg       case kind::PDF:
   1317  1.1  mrg 	return "U+202C (POP DIRECTIONAL FORMATTING)";
   1318  1.1  mrg       case kind::PDI:
   1319  1.1  mrg 	return "U+2069 (POP DIRECTIONAL ISOLATE)";
   1320  1.1  mrg       case kind::LTR:
   1321  1.1  mrg 	return "U+200E (LEFT-TO-RIGHT MARK)";
   1322  1.1  mrg       case kind::RTL:
   1323  1.1  mrg 	return "U+200F (RIGHT-TO-LEFT MARK)";
   1324  1.1  mrg       default:
   1325  1.1  mrg 	abort ();
   1326  1.1  mrg       }
   1327  1.1  mrg   }
   1328  1.1  mrg }
   1329  1.1  mrg 
   1330  1.1  mrg /* Get location_t for the range of bytes [START, START + NUM_BYTES)
   1331  1.1  mrg    within the current line in FILE, with the caret at START.  */
   1332  1.1  mrg 
   1333  1.1  mrg static location_t
   1334  1.1  mrg get_location_for_byte_range_in_cur_line (cpp_reader *pfile,
   1335  1.1  mrg 					 const unsigned char *const start,
   1336  1.1  mrg 					 size_t num_bytes)
   1337  1.1  mrg {
   1338  1.1  mrg   gcc_checking_assert (num_bytes > 0);
   1339  1.1  mrg 
   1340  1.1  mrg   /* CPP_BUF_COLUMN and linemap_position_for_column both refer
   1341  1.1  mrg      to offsets in bytes, but CPP_BUF_COLUMN is 0-based,
   1342  1.1  mrg      whereas linemap_position_for_column is 1-based.  */
   1343  1.1  mrg 
   1344  1.1  mrg   /* Get 0-based offsets within the line.  */
   1345  1.1  mrg   size_t start_offset = CPP_BUF_COLUMN (pfile->buffer, start);
   1346  1.1  mrg   size_t end_offset = start_offset + num_bytes - 1;
   1347  1.1  mrg 
   1348  1.1  mrg   /* Now convert to location_t, where "columns" are 1-based byte offsets.  */
   1349  1.1  mrg   location_t start_loc = linemap_position_for_column (pfile->line_table,
   1350  1.1  mrg 						      start_offset + 1);
   1351  1.1  mrg   location_t end_loc = linemap_position_for_column (pfile->line_table,
   1352  1.1  mrg 						     end_offset + 1);
   1353  1.1  mrg 
   1354  1.1  mrg   if (start_loc == end_loc)
   1355  1.1  mrg     return start_loc;
   1356  1.1  mrg 
   1357  1.1  mrg   source_range src_range;
   1358  1.1  mrg   src_range.m_start = start_loc;
   1359  1.1  mrg   src_range.m_finish = end_loc;
   1360  1.1  mrg   location_t combined_loc = COMBINE_LOCATION_DATA (pfile->line_table,
   1361  1.1  mrg 						   start_loc,
   1362  1.1  mrg 						   src_range,
   1363  1.1  mrg 						   NULL);
   1364  1.1  mrg   return combined_loc;
   1365  1.1  mrg }
   1366  1.1  mrg 
   1367  1.1  mrg /* Parse a sequence of 3 bytes starting with P and return its bidi code.  */
   1368  1.1  mrg 
   1369  1.1  mrg static bidi::kind
   1370  1.1  mrg get_bidi_utf8_1 (const unsigned char *const p)
   1371  1.1  mrg {
   1372  1.1  mrg   gcc_checking_assert (p[0] == bidi::utf8_start);
   1373  1.1  mrg 
   1374  1.1  mrg   if (p[1] == 0x80)
   1375  1.1  mrg     switch (p[2])
   1376  1.1  mrg       {
   1377  1.1  mrg       case 0xaa:
   1378  1.1  mrg 	return bidi::kind::LRE;
   1379  1.1  mrg       case 0xab:
   1380  1.1  mrg 	return bidi::kind::RLE;
   1381  1.1  mrg       case 0xac:
   1382  1.1  mrg 	return bidi::kind::PDF;
   1383  1.1  mrg       case 0xad:
   1384  1.1  mrg 	return bidi::kind::LRO;
   1385  1.1  mrg       case 0xae:
   1386  1.1  mrg 	return bidi::kind::RLO;
   1387  1.1  mrg       case 0x8e:
   1388  1.1  mrg 	return bidi::kind::LTR;
   1389  1.1  mrg       case 0x8f:
   1390  1.1  mrg 	return bidi::kind::RTL;
   1391  1.1  mrg       default:
   1392  1.1  mrg 	break;
   1393  1.1  mrg       }
   1394  1.1  mrg   else if (p[1] == 0x81)
   1395  1.1  mrg     switch (p[2])
   1396  1.1  mrg       {
   1397  1.1  mrg       case 0xa6:
   1398  1.1  mrg 	return bidi::kind::LRI;
   1399  1.1  mrg       case 0xa7:
   1400  1.1  mrg 	return bidi::kind::RLI;
   1401  1.1  mrg       case 0xa8:
   1402  1.1  mrg 	return bidi::kind::FSI;
   1403  1.1  mrg       case 0xa9:
   1404  1.1  mrg 	return bidi::kind::PDI;
   1405  1.1  mrg       default:
   1406  1.1  mrg 	break;
   1407  1.1  mrg       }
   1408  1.1  mrg 
   1409  1.1  mrg   return bidi::kind::NONE;
   1410  1.1  mrg }
   1411  1.1  mrg 
   1412  1.1  mrg /* Parse a sequence of 3 bytes starting with P and return its bidi code.
   1413  1.1  mrg    If the kind is not NONE, write the location to *OUT.*/
   1414  1.1  mrg 
   1415  1.1  mrg static bidi::kind
   1416  1.1  mrg get_bidi_utf8 (cpp_reader *pfile, const unsigned char *const p, location_t *out)
   1417  1.1  mrg {
   1418  1.1  mrg   bidi::kind result = get_bidi_utf8_1 (p);
   1419  1.1  mrg   if (result != bidi::kind::NONE)
   1420  1.1  mrg     {
   1421  1.1  mrg       /* We have a sequence of 3 bytes starting at P.  */
   1422  1.1  mrg       *out = get_location_for_byte_range_in_cur_line (pfile, p, 3);
   1423  1.1  mrg     }
   1424  1.1  mrg   return result;
   1425  1.1  mrg }
   1426  1.1  mrg 
   1427  1.1  mrg /* Parse a UCN where P points just past \u or \U and return its bidi code.  */
   1428  1.1  mrg 
   1429  1.1  mrg static bidi::kind
   1430  1.1  mrg get_bidi_ucn_1 (const unsigned char *p, bool is_U)
   1431  1.1  mrg {
   1432  1.1  mrg   /* 6.4.3 Universal Character Names
   1433  1.1  mrg       \u hex-quad
   1434  1.1  mrg       \U hex-quad hex-quad
   1435  1.1  mrg      where \unnnn means \U0000nnnn.  */
   1436  1.1  mrg 
   1437  1.1  mrg   if (is_U)
   1438  1.1  mrg     {
   1439  1.1  mrg       if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
   1440  1.1  mrg 	return bidi::kind::NONE;
   1441  1.1  mrg       /* Skip 4B so we can treat \u and \U the same below.  */
   1442  1.1  mrg       p += 4;
   1443  1.1  mrg     }
   1444  1.1  mrg 
   1445  1.1  mrg   /* All code points we are looking for start with 20xx.  */
   1446  1.1  mrg   if (p[0] != '2' || p[1] != '0')
   1447  1.1  mrg     return bidi::kind::NONE;
   1448  1.1  mrg   else if (p[2] == '2')
   1449  1.1  mrg     switch (p[3])
   1450  1.1  mrg       {
   1451  1.1  mrg       case 'a':
   1452  1.1  mrg       case 'A':
   1453  1.1  mrg 	return bidi::kind::LRE;
   1454  1.1  mrg       case 'b':
   1455  1.1  mrg       case 'B':
   1456  1.1  mrg 	return bidi::kind::RLE;
   1457  1.1  mrg       case 'c':
   1458  1.1  mrg       case 'C':
   1459  1.1  mrg 	return bidi::kind::PDF;
   1460  1.1  mrg       case 'd':
   1461  1.1  mrg       case 'D':
   1462  1.1  mrg 	return bidi::kind::LRO;
   1463  1.1  mrg       case 'e':
   1464  1.1  mrg       case 'E':
   1465  1.1  mrg 	return bidi::kind::RLO;
   1466  1.1  mrg       default:
   1467  1.1  mrg 	break;
   1468  1.1  mrg       }
   1469  1.1  mrg   else if (p[2] == '6')
   1470  1.1  mrg     switch (p[3])
   1471  1.1  mrg       {
   1472  1.1  mrg       case '6':
   1473  1.1  mrg 	return bidi::kind::LRI;
   1474  1.1  mrg       case '7':
   1475  1.1  mrg 	return bidi::kind::RLI;
   1476  1.1  mrg       case '8':
   1477  1.1  mrg 	return bidi::kind::FSI;
   1478  1.1  mrg       case '9':
   1479  1.1  mrg 	return bidi::kind::PDI;
   1480  1.1  mrg       default:
   1481  1.1  mrg 	break;
   1482  1.1  mrg       }
   1483  1.1  mrg   else if (p[2] == '0')
   1484  1.1  mrg     switch (p[3])
   1485  1.1  mrg       {
   1486  1.1  mrg       case 'e':
   1487  1.1  mrg       case 'E':
   1488  1.1  mrg 	return bidi::kind::LTR;
   1489  1.1  mrg       case 'f':
   1490  1.1  mrg       case 'F':
   1491  1.1  mrg 	return bidi::kind::RTL;
   1492  1.1  mrg       default:
   1493  1.1  mrg 	break;
   1494  1.1  mrg       }
   1495  1.1  mrg 
   1496  1.1  mrg   return bidi::kind::NONE;
   1497  1.1  mrg }
   1498  1.1  mrg 
   1499  1.1  mrg /* Parse a UCN where P points just past \u or \U and return its bidi code.
   1500  1.1  mrg    If the kind is not NONE, write the location to *OUT.*/
   1501  1.1  mrg 
   1502  1.1  mrg static bidi::kind
   1503  1.1  mrg get_bidi_ucn (cpp_reader *pfile,  const unsigned char *p, bool is_U,
   1504  1.1  mrg 	      location_t *out)
   1505  1.1  mrg {
   1506  1.1  mrg   bidi::kind result = get_bidi_ucn_1 (p, is_U);
   1507  1.1  mrg   if (result != bidi::kind::NONE)
   1508  1.1  mrg     {
   1509  1.1  mrg       const unsigned char *start = p - 2;
   1510  1.1  mrg       size_t num_bytes = 2 + (is_U ? 8 : 4);
   1511  1.1  mrg       *out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes);
   1512  1.1  mrg     }
   1513  1.1  mrg   return result;
   1514  1.1  mrg }
   1515  1.1  mrg 
   1516  1.1  mrg /* Subclass of rich_location for reporting on unpaired UTF-8
   1517  1.1  mrg    bidirectional control character(s).
   1518  1.1  mrg    Escape the source lines on output, and show all unclosed
   1519  1.1  mrg    bidi context, labelling everything.  */
   1520  1.1  mrg 
   1521  1.1  mrg class unpaired_bidi_rich_location : public rich_location
   1522  1.1  mrg {
   1523  1.1  mrg  public:
   1524  1.1  mrg   class custom_range_label : public range_label
   1525  1.1  mrg   {
   1526  1.1  mrg    public:
   1527  1.1  mrg      label_text get_text (unsigned range_idx) const FINAL OVERRIDE
   1528  1.1  mrg      {
   1529  1.1  mrg        /* range 0 is the primary location; each subsequent range i + 1
   1530  1.1  mrg 	  is for bidi::vec[i].  */
   1531  1.1  mrg        if (range_idx > 0)
   1532  1.1  mrg 	 {
   1533  1.1  mrg 	   const bidi::context &ctxt (bidi::vec[range_idx - 1]);
   1534  1.1  mrg 	   return label_text::borrow (bidi::to_str (ctxt.m_kind));
   1535  1.1  mrg 	 }
   1536  1.1  mrg        else
   1537  1.1  mrg 	 return label_text::borrow (_("end of bidirectional context"));
   1538  1.1  mrg      }
   1539  1.1  mrg   };
   1540  1.1  mrg 
   1541  1.1  mrg   unpaired_bidi_rich_location (cpp_reader *pfile, location_t loc)
   1542  1.1  mrg   : rich_location (pfile->line_table, loc, &m_custom_label)
   1543  1.1  mrg   {
   1544  1.1  mrg     set_escape_on_output (true);
   1545  1.1  mrg     for (unsigned i = 0; i < bidi::vec.count (); i++)
   1546  1.1  mrg       add_range (bidi::vec[i].m_loc,
   1547  1.1  mrg 		 SHOW_RANGE_WITHOUT_CARET,
   1548  1.1  mrg 		 &m_custom_label);
   1549  1.1  mrg   }
   1550  1.1  mrg 
   1551  1.1  mrg  private:
   1552  1.1  mrg    custom_range_label m_custom_label;
   1553  1.1  mrg };
   1554  1.1  mrg 
   1555  1.1  mrg /* We're closing a bidi context, that is, we've encountered a newline,
   1556  1.1  mrg    are closing a C-style comment, or are at the end of a string literal,
   1557  1.1  mrg    character constant, or identifier.  Warn if this context was not
   1558  1.1  mrg    properly terminated by a PDI or PDF.  P points to the last character
   1559  1.1  mrg    in this context.  */
   1560  1.1  mrg 
   1561  1.1  mrg static void
   1562  1.1  mrg maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p)
   1563  1.1  mrg {
   1564  1.1  mrg   const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
   1565  1.1  mrg   if (bidi::vec.count () > 0
   1566  1.1  mrg       && (warn_bidi & bidirectional_unpaired
   1567  1.1  mrg 	  && (!bidi::current_ctx_ucn_p ()
   1568  1.1  mrg 	      || (warn_bidi & bidirectional_ucn))))
   1569  1.1  mrg     {
   1570  1.1  mrg       const location_t loc
   1571  1.1  mrg 	= linemap_position_for_column (pfile->line_table,
   1572  1.1  mrg 				       CPP_BUF_COLUMN (pfile->buffer, p));
   1573  1.1  mrg       unpaired_bidi_rich_location rich_loc (pfile, loc);
   1574  1.1  mrg       /* cpp_callbacks doesn't yet have a way to handle singular vs plural
   1575  1.1  mrg 	 forms of a diagnostic, so fake it for now.  */
   1576  1.1  mrg       if (bidi::vec.count () > 1)
   1577  1.1  mrg 	cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
   1578  1.1  mrg 			"unpaired UTF-8 bidirectional control characters "
   1579  1.1  mrg 			"detected");
   1580  1.1  mrg       else
   1581  1.1  mrg 	cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
   1582  1.1  mrg 			"unpaired UTF-8 bidirectional control character "
   1583  1.1  mrg 			"detected");
   1584  1.1  mrg     }
   1585  1.1  mrg   /* We're done with this context.  */
   1586  1.1  mrg   bidi::on_close ();
   1587  1.1  mrg }
   1588  1.1  mrg 
   1589  1.1  mrg /* We're at the beginning or in the middle of an identifier/comment/string
   1590  1.1  mrg    literal/character constant.  Warn if we've encountered a bidi character.
   1591  1.1  mrg    KIND says which bidi control character it was; UCN_P is true iff this bidi
   1592  1.1  mrg    control character was written as a UCN.  LOC is the location of the
   1593  1.1  mrg    character, but is only valid if KIND != bidi::kind::NONE.  */
   1594  1.1  mrg 
   1595  1.1  mrg static void
   1596  1.1  mrg maybe_warn_bidi_on_char (cpp_reader *pfile, bidi::kind kind,
   1597  1.1  mrg 			 bool ucn_p, location_t loc)
   1598  1.1  mrg {
   1599  1.1  mrg   if (__builtin_expect (kind == bidi::kind::NONE, 1))
   1600  1.1  mrg     return;
   1601  1.1  mrg 
   1602  1.1  mrg   const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
   1603  1.1  mrg 
   1604  1.1  mrg   if (warn_bidi & (bidirectional_unpaired|bidirectional_any))
   1605  1.1  mrg     {
   1606  1.1  mrg       rich_location rich_loc (pfile->line_table, loc);
   1607  1.1  mrg       rich_loc.set_escape_on_output (true);
   1608  1.1  mrg 
   1609  1.1  mrg       /* It seems excessive to warn about a PDI/PDF that is closing
   1610  1.1  mrg 	 an opened context because we've already warned about the
   1611  1.1  mrg 	 opening character.  Except warn when we have a UCN x UTF-8
   1612  1.1  mrg 	 mismatch, if UCN checking is enabled.  */
   1613  1.1  mrg       if (kind == bidi::current_ctx ())
   1614  1.1  mrg 	{
   1615  1.1  mrg 	  if (warn_bidi == (bidirectional_unpaired|bidirectional_ucn)
   1616  1.1  mrg 	      && bidi::current_ctx_ucn_p () != ucn_p)
   1617  1.1  mrg 	    {
   1618  1.1  mrg 	      rich_loc.add_range (bidi::current_ctx_loc ());
   1619  1.1  mrg 	      cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
   1620  1.1  mrg 			      "UTF-8 vs UCN mismatch when closing "
   1621  1.1  mrg 			      "a context by \"%s\"", bidi::to_str (kind));
   1622  1.1  mrg 	    }
   1623  1.1  mrg 	}
   1624  1.1  mrg       else if (warn_bidi & bidirectional_any
   1625  1.1  mrg 	       && (!ucn_p || (warn_bidi & bidirectional_ucn)))
   1626  1.1  mrg 	{
   1627  1.1  mrg 	  if (kind == bidi::kind::PDF || kind == bidi::kind::PDI)
   1628  1.1  mrg 	    cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
   1629  1.1  mrg 			    "\"%s\" is closing an unopened context",
   1630  1.1  mrg 			    bidi::to_str (kind));
   1631  1.1  mrg 	  else
   1632  1.1  mrg 	    cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
   1633  1.1  mrg 			    "found problematic Unicode character \"%s\"",
   1634  1.1  mrg 			    bidi::to_str (kind));
   1635  1.1  mrg 	}
   1636  1.1  mrg     }
   1637  1.1  mrg   /* We're done with this context.  */
   1638  1.1  mrg   bidi::on_char (kind, ucn_p, loc);
   1639  1.1  mrg }
   1640  1.1  mrg 
   1641  1.1  mrg /* Skip a C-style block comment.  We find the end of the comment by
   1642  1.1  mrg    seeing if an asterisk is before every '/' we encounter.  Returns
   1643  1.1  mrg    nonzero if comment terminated by EOF, zero otherwise.
   1644  1.1  mrg 
   1645  1.1  mrg    Buffer->cur points to the initial asterisk of the comment.  */
   1646  1.1  mrg bool
   1647  1.1  mrg _cpp_skip_block_comment (cpp_reader *pfile)
   1648  1.1  mrg {
   1649  1.1  mrg   cpp_buffer *buffer = pfile->buffer;
   1650  1.1  mrg   const uchar *cur = buffer->cur;
   1651  1.1  mrg   uchar c;
   1652  1.1  mrg   const bool warn_bidi_p = pfile->warn_bidi_p ();
   1653  1.1  mrg 
   1654  1.1  mrg   cur++;
   1655  1.1  mrg   if (*cur == '/')
   1656  1.1  mrg     cur++;
   1657  1.1  mrg 
   1658  1.1  mrg   for (;;)
   1659  1.1  mrg     {
   1660  1.1  mrg       /* People like decorating comments with '*', so check for '/'
   1661  1.1  mrg 	 instead for efficiency.  */
   1662  1.1  mrg       c = *cur++;
   1663  1.1  mrg 
   1664  1.1  mrg       if (c == '/')
   1665  1.1  mrg 	{
   1666  1.1  mrg 	  if (cur[-2] == '*')
   1667  1.1  mrg 	    {
   1668  1.1  mrg 	      if (warn_bidi_p)
   1669  1.1  mrg 		maybe_warn_bidi_on_close (pfile, cur);
   1670  1.1  mrg 	      break;
   1671  1.1  mrg 	    }
   1672  1.1  mrg 
   1673  1.1  mrg 	  /* Warn about potential nested comments, but not if the '/'
   1674  1.1  mrg 	     comes immediately before the true comment delimiter.
   1675  1.1  mrg 	     Don't bother to get it right across escaped newlines.  */
   1676  1.1  mrg 	  if (CPP_OPTION (pfile, warn_comments)
   1677  1.1  mrg 	      && cur[0] == '*' && cur[1] != '/')
   1678  1.1  mrg 	    {
   1679  1.1  mrg 	      buffer->cur = cur;
   1680  1.1  mrg 	      cpp_warning_with_line (pfile, CPP_W_COMMENTS,
   1681  1.1  mrg 				     pfile->line_table->highest_line,
   1682  1.1  mrg 				     CPP_BUF_COL (buffer),
   1683  1.1  mrg 				     "\"/*\" within comment");
   1684  1.1  mrg 	    }
   1685  1.1  mrg 	}
   1686  1.1  mrg       else if (c == '\n')
   1687  1.1  mrg 	{
   1688  1.1  mrg 	  unsigned int cols;
   1689  1.1  mrg 	  buffer->cur = cur - 1;
   1690  1.1  mrg 	  if (warn_bidi_p)
   1691  1.1  mrg 	    maybe_warn_bidi_on_close (pfile, cur);
   1692  1.1  mrg 	  _cpp_process_line_notes (pfile, true);
   1693  1.1  mrg 	  if (buffer->next_line >= buffer->rlimit)
   1694  1.1  mrg 	    return true;
   1695  1.1  mrg 	  _cpp_clean_line (pfile);
   1696  1.1  mrg 
   1697  1.1  mrg 	  cols = buffer->next_line - buffer->line_base;
   1698  1.1  mrg 	  CPP_INCREMENT_LINE (pfile, cols);
   1699  1.1  mrg 
   1700  1.1  mrg 	  cur = buffer->cur;
   1701  1.1  mrg 	}
   1702  1.1  mrg       /* If this is a beginning of a UTF-8 encoding, it might be
   1703  1.1  mrg 	 a bidirectional control character.  */
   1704  1.1  mrg       else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
   1705  1.1  mrg 	{
   1706  1.1  mrg 	  location_t loc;
   1707  1.1  mrg 	  bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
   1708  1.1  mrg 	  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
   1709  1.1  mrg 	}
   1710  1.1  mrg     }
   1711  1.1  mrg 
   1712  1.1  mrg   buffer->cur = cur;
   1713  1.1  mrg   _cpp_process_line_notes (pfile, true);
   1714  1.1  mrg   return false;
   1715  1.1  mrg }
   1716  1.1  mrg 
   1717  1.1  mrg /* Skip a C++ line comment, leaving buffer->cur pointing to the
   1718  1.1  mrg    terminating newline.  Handles escaped newlines.  Returns nonzero
   1719  1.1  mrg    if a multiline comment.  */
   1720  1.1  mrg static int
   1721  1.1  mrg skip_line_comment (cpp_reader *pfile)
   1722  1.1  mrg {
   1723  1.1  mrg   cpp_buffer *buffer = pfile->buffer;
   1724  1.1  mrg   location_t orig_line = pfile->line_table->highest_line;
   1725  1.1  mrg   const bool warn_bidi_p = pfile->warn_bidi_p ();
   1726  1.1  mrg 
   1727  1.1  mrg   if (!warn_bidi_p)
   1728  1.1  mrg     while (*buffer->cur != '\n')
   1729  1.1  mrg       buffer->cur++;
   1730  1.1  mrg   else
   1731  1.1  mrg     {
   1732  1.1  mrg       while (*buffer->cur != '\n'
   1733  1.1  mrg 	     && *buffer->cur != bidi::utf8_start)
   1734  1.1  mrg 	buffer->cur++;
   1735  1.1  mrg       if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
   1736  1.1  mrg 	{
   1737  1.1  mrg 	  while (*buffer->cur != '\n')
   1738  1.1  mrg 	    {
   1739  1.1  mrg 	      if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
   1740  1.1  mrg 		{
   1741  1.1  mrg 		  location_t loc;
   1742  1.1  mrg 		  bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc);
   1743  1.1  mrg 		  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
   1744  1.1  mrg 		}
   1745  1.1  mrg 	      buffer->cur++;
   1746  1.1  mrg 	    }
   1747  1.1  mrg 	  maybe_warn_bidi_on_close (pfile, buffer->cur);
   1748  1.1  mrg 	}
   1749  1.1  mrg     }
   1750  1.1  mrg 
   1751  1.1  mrg   _cpp_process_line_notes (pfile, true);
   1752  1.1  mrg   return orig_line != pfile->line_table->highest_line;
   1753  1.1  mrg }
   1754  1.1  mrg 
   1755  1.1  mrg /* Skips whitespace, saving the next non-whitespace character.  */
   1756  1.1  mrg static void
   1757  1.1  mrg skip_whitespace (cpp_reader *pfile, cppchar_t c)
   1758  1.1  mrg {
   1759  1.1  mrg   cpp_buffer *buffer = pfile->buffer;
   1760  1.1  mrg   bool saw_NUL = false;
   1761  1.1  mrg 
   1762  1.1  mrg   do
   1763  1.1  mrg     {
   1764  1.1  mrg       /* Horizontal space always OK.  */
   1765  1.1  mrg       if (c == ' ' || c == '\t')
   1766  1.1  mrg 	;
   1767  1.1  mrg       /* Just \f \v or \0 left.  */
   1768  1.1  mrg       else if (c == '\0')
   1769  1.1  mrg 	saw_NUL = true;
   1770  1.1  mrg       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
   1771  1.1  mrg 	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
   1772  1.1  mrg 			     CPP_BUF_COL (buffer),
   1773  1.1  mrg 			     "%s in preprocessing directive",
   1774  1.1  mrg 			     c == '\f' ? "form feed" : "vertical tab");
   1775  1.1  mrg 
   1776  1.1  mrg       c = *buffer->cur++;
   1777  1.1  mrg     }
   1778  1.1  mrg   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
   1779  1.1  mrg   while (is_nvspace (c));
   1780  1.1  mrg 
   1781  1.1  mrg   if (saw_NUL)
   1782  1.1  mrg     {
   1783  1.1  mrg       encoding_rich_location rich_loc (pfile);
   1784  1.1  mrg       cpp_error_at (pfile, CPP_DL_WARNING, &rich_loc,
   1785  1.1  mrg 		    "null character(s) ignored");
   1786  1.1  mrg     }
   1787  1.1  mrg 
   1788  1.1  mrg   buffer->cur--;
   1789  1.1  mrg }
   1790  1.1  mrg 
   1791  1.1  mrg /* See if the characters of a number token are valid in a name (no
   1792  1.1  mrg    '.', '+' or '-').  */
   1793  1.1  mrg static int
   1794  1.1  mrg name_p (cpp_reader *pfile, const cpp_string *string)
   1795  1.1  mrg {
   1796  1.1  mrg   unsigned int i;
   1797  1.1  mrg 
   1798  1.1  mrg   for (i = 0; i < string->len; i++)
   1799  1.1  mrg     if (!is_idchar (string->text[i]))
   1800  1.1  mrg       return 0;
   1801  1.1  mrg 
   1802  1.1  mrg   return 1;
   1803  1.1  mrg }
   1804  1.1  mrg 
   1805  1.1  mrg /* After parsing an identifier or other sequence, produce a warning about
   1806  1.1  mrg    sequences not in NFC/NFKC.  */
   1807  1.1  mrg static void
   1808  1.1  mrg warn_about_normalization (cpp_reader *pfile,
   1809  1.1  mrg 			  const cpp_token *token,
   1810  1.1  mrg 			  const struct normalize_state *s)
   1811  1.1  mrg {
   1812  1.1  mrg   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
   1813  1.1  mrg       && !pfile->state.skipping)
   1814  1.1  mrg     {
   1815  1.1  mrg       location_t loc = token->src_loc;
   1816  1.1  mrg 
   1817  1.1  mrg       /* If possible, create a location range for the token.  */
   1818  1.1  mrg       if (loc >= RESERVED_LOCATION_COUNT
   1819  1.1  mrg 	  && token->type != CPP_EOF
   1820  1.1  mrg 	  /* There must be no line notes to process.  */
   1821  1.1  mrg 	  && (!(pfile->buffer->cur
   1822  1.1  mrg 		>= pfile->buffer->notes[pfile->buffer->cur_note].pos
   1823  1.1  mrg 		&& !pfile->overlaid_buffer)))
   1824  1.1  mrg 	{
   1825  1.1  mrg 	  source_range tok_range;
   1826  1.1  mrg 	  tok_range.m_start = loc;
   1827  1.1  mrg 	  tok_range.m_finish
   1828  1.1  mrg 	    = linemap_position_for_column (pfile->line_table,
   1829  1.1  mrg 					   CPP_BUF_COLUMN (pfile->buffer,
   1830  1.1  mrg 							   pfile->buffer->cur));
   1831  1.1  mrg 	  loc = COMBINE_LOCATION_DATA (pfile->line_table,
   1832  1.1  mrg 				       loc, tok_range, NULL);
   1833  1.1  mrg 	}
   1834  1.1  mrg 
   1835  1.1  mrg       encoding_rich_location rich_loc (pfile, loc);
   1836  1.1  mrg 
   1837  1.1  mrg       /* Make sure that the token is printed using UCNs, even
   1838  1.1  mrg 	 if we'd otherwise happily print UTF-8.  */
   1839  1.1  mrg       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
   1840  1.1  mrg       size_t sz;
   1841  1.1  mrg 
   1842  1.1  mrg       sz = cpp_spell_token (pfile, token, buf, false) - buf;
   1843  1.1  mrg       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
   1844  1.1  mrg 	cpp_warning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
   1845  1.1  mrg 			"`%.*s' is not in NFKC", (int) sz, buf);
   1846  1.1  mrg       else if (CPP_OPTION (pfile, cplusplus))
   1847  1.1  mrg 	cpp_pedwarning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
   1848  1.1  mrg 				  "`%.*s' is not in NFC", (int) sz, buf);
   1849  1.1  mrg       else
   1850  1.1  mrg 	cpp_warning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
   1851  1.1  mrg 			"`%.*s' is not in NFC", (int) sz, buf);
   1852  1.1  mrg       free (buf);
   1853  1.1  mrg     }
   1854  1.1  mrg }
   1855  1.1  mrg 
   1856  1.1  mrg static const cppchar_t utf8_signifier = 0xC0;
   1857  1.1  mrg 
   1858  1.1  mrg /* Returns TRUE if the sequence starting at buffer->cur is valid in
   1859  1.1  mrg    an identifier.  FIRST is TRUE if this starts an identifier.  */
   1860  1.1  mrg 
   1861  1.1  mrg static bool
   1862  1.1  mrg forms_identifier_p (cpp_reader *pfile, int first,
   1863  1.1  mrg 		    struct normalize_state *state)
   1864  1.1  mrg {
   1865  1.1  mrg   cpp_buffer *buffer = pfile->buffer;
   1866  1.1  mrg   const bool warn_bidi_p = pfile->warn_bidi_p ();
   1867  1.1  mrg 
   1868  1.1  mrg   if (*buffer->cur == '$')
   1869  1.1  mrg     {
   1870  1.1  mrg       if (!CPP_OPTION (pfile, dollars_in_ident))
   1871  1.1  mrg 	return false;
   1872  1.1  mrg 
   1873  1.1  mrg       buffer->cur++;
   1874  1.1  mrg       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
   1875  1.1  mrg 	{
   1876  1.1  mrg 	  CPP_OPTION (pfile, warn_dollars) = 0;
   1877  1.1  mrg 	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
   1878  1.1  mrg 	}
   1879  1.1  mrg 
   1880  1.1  mrg       return true;
   1881  1.1  mrg     }
   1882  1.1  mrg 
   1883  1.1  mrg   /* Is this a syntactically valid UCN or a valid UTF-8 char?  */
   1884  1.1  mrg   if (CPP_OPTION (pfile, extended_identifiers))
   1885  1.1  mrg     {
   1886  1.1  mrg       cppchar_t s;
   1887  1.1  mrg       if (*buffer->cur >= utf8_signifier)
   1888  1.1  mrg 	{
   1889  1.1  mrg 	  if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)
   1890  1.1  mrg 	      && warn_bidi_p)
   1891  1.1  mrg 	    {
   1892  1.1  mrg 	      location_t loc;
   1893  1.1  mrg 	      bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc);
   1894  1.1  mrg 	      maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
   1895  1.1  mrg 	    }
   1896  1.1  mrg 	  if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
   1897  1.1  mrg 			       state, &s))
   1898  1.1  mrg 	    return true;
   1899  1.1  mrg 	}
   1900  1.1  mrg       else if (*buffer->cur == '\\'
   1901  1.1  mrg 	       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
   1902  1.1  mrg 	{
   1903  1.1  mrg 	  buffer->cur += 2;
   1904  1.1  mrg 	  if (warn_bidi_p)
   1905  1.1  mrg 	    {
   1906  1.1  mrg 	      location_t loc;
   1907  1.1  mrg 	      bidi::kind kind = get_bidi_ucn (pfile,
   1908  1.1  mrg 					      buffer->cur,
   1909  1.1  mrg 					      buffer->cur[-1] == 'U',
   1910  1.1  mrg 					      &loc);
   1911  1.1  mrg 	      maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc);
   1912  1.1  mrg 	    }
   1913  1.1  mrg 	  if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
   1914  1.1  mrg 			      state, &s, NULL, NULL))
   1915  1.1  mrg 	    return true;
   1916  1.1  mrg 	  buffer->cur -= 2;
   1917  1.1  mrg 	}
   1918  1.1  mrg     }
   1919  1.1  mrg 
   1920  1.1  mrg   return false;
   1921  1.1  mrg }
   1922  1.1  mrg 
   1923  1.1  mrg /* Helper function to issue error about improper __VA_OPT__ use.  */
   1924  1.1  mrg static void
   1925  1.1  mrg maybe_va_opt_error (cpp_reader *pfile)
   1926  1.1  mrg {
   1927  1.1  mrg   if (CPP_PEDANTIC (pfile) && !CPP_OPTION (pfile, va_opt))
   1928  1.1  mrg     {
   1929  1.1  mrg       /* __VA_OPT__ should not be accepted at all, but allow it in
   1930  1.1  mrg 	 system headers.  */
   1931  1.1  mrg       if (!_cpp_in_system_header (pfile))
   1932  1.1  mrg 	cpp_error (pfile, CPP_DL_PEDWARN,
   1933  1.1  mrg 		   "__VA_OPT__ is not available until C++20");
   1934  1.1  mrg     }
   1935  1.1  mrg   else if (!pfile->state.va_args_ok)
   1936  1.1  mrg     {
   1937  1.1  mrg       /* __VA_OPT__ should only appear in the replacement list of a
   1938  1.1  mrg 	 variadic macro.  */
   1939  1.1  mrg       cpp_error (pfile, CPP_DL_PEDWARN,
   1940  1.1  mrg 		 "__VA_OPT__ can only appear in the expansion"
   1941  1.1  mrg 		 " of a C++20 variadic macro");
   1942  1.1  mrg     }
   1943  1.1  mrg }
   1944  1.1  mrg 
   1945  1.1  mrg /* Helper function to get the cpp_hashnode of the identifier BASE.  */
   1946  1.1  mrg static cpp_hashnode *
   1947  1.1  mrg lex_identifier_intern (cpp_reader *pfile, const uchar *base)
   1948  1.1  mrg {
   1949  1.1  mrg   cpp_hashnode *result;
   1950  1.1  mrg   const uchar *cur;
   1951  1.1  mrg   unsigned int len;
   1952  1.1  mrg   unsigned int hash = HT_HASHSTEP (0, *base);
   1953  1.1  mrg 
   1954  1.1  mrg   cur = base + 1;
   1955  1.1  mrg   while (ISIDNUM (*cur))
   1956  1.1  mrg     {
   1957  1.1  mrg       hash = HT_HASHSTEP (hash, *cur);
   1958  1.1  mrg       cur++;
   1959  1.1  mrg     }
   1960  1.1  mrg   len = cur - base;
   1961  1.1  mrg   hash = HT_HASHFINISH (hash, len);
   1962  1.1  mrg   result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
   1963  1.1  mrg 					      base, len, hash, HT_ALLOC));
   1964  1.1  mrg 
   1965  1.1  mrg   /* Rarely, identifiers require diagnostics when lexed.  */
   1966  1.1  mrg   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
   1967  1.1  mrg 			&& !pfile->state.skipping, 0))
   1968  1.1  mrg     {
   1969  1.1  mrg       /* It is allowed to poison the same identifier twice.  */
   1970  1.1  mrg       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
   1971  1.1  mrg 	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
   1972  1.1  mrg 		   NODE_NAME (result));
   1973  1.1  mrg 
   1974  1.1  mrg       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
   1975  1.1  mrg 	 replacement list of a variadic macro.  */
   1976  1.1  mrg       if (result == pfile->spec_nodes.n__VA_ARGS__
   1977  1.1  mrg 	  && !pfile->state.va_args_ok)
   1978  1.1  mrg 	{
   1979  1.1  mrg 	  if (CPP_OPTION (pfile, cplusplus))
   1980  1.1  mrg 	    cpp_error (pfile, CPP_DL_PEDWARN,
   1981  1.1  mrg 		       "__VA_ARGS__ can only appear in the expansion"
   1982  1.1  mrg 		       " of a C++11 variadic macro");
   1983  1.1  mrg 	  else
   1984  1.1  mrg 	    cpp_error (pfile, CPP_DL_PEDWARN,
   1985  1.1  mrg 		       "__VA_ARGS__ can only appear in the expansion"
   1986  1.1  mrg 		       " of a C99 variadic macro");
   1987  1.1  mrg 	}
   1988  1.1  mrg 
   1989  1.1  mrg       if (result == pfile->spec_nodes.n__VA_OPT__)
   1990  1.1  mrg 	maybe_va_opt_error (pfile);
   1991  1.1  mrg 
   1992  1.1  mrg       /* For -Wc++-compat, warn about use of C++ named operators.  */
   1993  1.1  mrg       if (result->flags & NODE_WARN_OPERATOR)
   1994  1.1  mrg 	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
   1995  1.1  mrg 		     "identifier \"%s\" is a special operator name in C++",
   1996  1.1  mrg 		     NODE_NAME (result));
   1997  1.1  mrg     }
   1998  1.1  mrg 
   1999  1.1  mrg   return result;
   2000  1.1  mrg }
   2001  1.1  mrg 
   2002  1.1  mrg /* Get the cpp_hashnode of an identifier specified by NAME in
   2003  1.1  mrg    the current cpp_reader object.  If none is found, NULL is returned.  */
   2004  1.1  mrg cpp_hashnode *
   2005  1.1  mrg _cpp_lex_identifier (cpp_reader *pfile, const char *name)
   2006  1.1  mrg {
   2007  1.1  mrg   cpp_hashnode *result;
   2008  1.1  mrg   result = lex_identifier_intern (pfile, (uchar *) name);
   2009  1.1  mrg   return result;
   2010  1.1  mrg }
   2011  1.1  mrg 
   2012  1.1  mrg /* Lex an identifier starting at BUFFER->CUR - 1.  */
   2013  1.1  mrg static cpp_hashnode *
   2014  1.1  mrg lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
   2015  1.1  mrg 		struct normalize_state *nst, cpp_hashnode **spelling)
   2016  1.1  mrg {
   2017  1.1  mrg   cpp_hashnode *result;
   2018  1.1  mrg   const uchar *cur;
   2019  1.1  mrg   unsigned int len;
   2020  1.1  mrg   unsigned int hash = HT_HASHSTEP (0, *base);
   2021  1.1  mrg   const bool warn_bidi_p = pfile->warn_bidi_p ();
   2022  1.1  mrg 
   2023  1.1  mrg   cur = pfile->buffer->cur;
   2024  1.1  mrg   if (! starts_ucn)
   2025  1.1  mrg     {
   2026  1.1  mrg       while (ISIDNUM (*cur))
   2027  1.1  mrg 	{
   2028  1.1  mrg 	  hash = HT_HASHSTEP (hash, *cur);
   2029  1.1  mrg 	  cur++;
   2030  1.1  mrg 	}
   2031  1.1  mrg       NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
   2032  1.1  mrg     }
   2033  1.1  mrg   pfile->buffer->cur = cur;
   2034  1.1  mrg   if (starts_ucn || forms_identifier_p (pfile, false, nst))
   2035  1.1  mrg     {
   2036  1.1  mrg       /* Slower version for identifiers containing UCNs
   2037  1.1  mrg 	 or extended chars (including $).  */
   2038  1.1  mrg       do {
   2039  1.1  mrg 	while (ISIDNUM (*pfile->buffer->cur))
   2040  1.1  mrg 	  {
   2041  1.1  mrg 	    NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
   2042  1.1  mrg 	    pfile->buffer->cur++;
   2043  1.1  mrg 	  }
   2044  1.1  mrg       } while (forms_identifier_p (pfile, false, nst));
   2045  1.1  mrg       if (warn_bidi_p)
   2046  1.1  mrg 	maybe_warn_bidi_on_close (pfile, pfile->buffer->cur);
   2047  1.1  mrg       result = _cpp_interpret_identifier (pfile, base,
   2048  1.1  mrg 					  pfile->buffer->cur - base);
   2049  1.1  mrg       *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
   2050  1.1  mrg     }
   2051  1.1  mrg   else
   2052  1.1  mrg     {
   2053  1.1  mrg       len = cur - base;
   2054  1.1  mrg       hash = HT_HASHFINISH (hash, len);
   2055  1.1  mrg 
   2056  1.1  mrg       result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
   2057  1.1  mrg 						  base, len, hash, HT_ALLOC));
   2058  1.1  mrg       *spelling = result;
   2059  1.1  mrg     }
   2060  1.1  mrg 
   2061  1.1  mrg   /* Rarely, identifiers require diagnostics when lexed.  */
   2062  1.1  mrg   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
   2063  1.1  mrg 			&& !pfile->state.skipping, 0))
   2064  1.1  mrg     {
   2065  1.1  mrg       /* It is allowed to poison the same identifier twice.  */
   2066  1.1  mrg       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
   2067  1.1  mrg 	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
   2068  1.1  mrg 		   NODE_NAME (result));
   2069  1.1  mrg 
   2070  1.1  mrg       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
   2071  1.1  mrg 	 replacement list of a variadic macro.  */
   2072  1.1  mrg       if (result == pfile->spec_nodes.n__VA_ARGS__
   2073  1.1  mrg 	  && !pfile->state.va_args_ok)
   2074  1.1  mrg 	{
   2075  1.1  mrg 	  if (CPP_OPTION (pfile, cplusplus))
   2076  1.1  mrg 	    cpp_error (pfile, CPP_DL_PEDWARN,
   2077  1.1  mrg 		       "__VA_ARGS__ can only appear in the expansion"
   2078  1.1  mrg 		       " of a C++11 variadic macro");
   2079  1.1  mrg 	  else
   2080  1.1  mrg 	    cpp_error (pfile, CPP_DL_PEDWARN,
   2081  1.1  mrg 		       "__VA_ARGS__ can only appear in the expansion"
   2082  1.1  mrg 		       " of a C99 variadic macro");
   2083  1.1  mrg 	}
   2084  1.1  mrg 
   2085  1.1  mrg       /* __VA_OPT__ should only appear in the replacement list of a
   2086  1.1  mrg 	 variadic macro.  */
   2087  1.1  mrg       if (result == pfile->spec_nodes.n__VA_OPT__)
   2088  1.1  mrg 	maybe_va_opt_error (pfile);
   2089  1.1  mrg 
   2090  1.1  mrg       /* For -Wc++-compat, warn about use of C++ named operators.  */
   2091  1.1  mrg       if (result->flags & NODE_WARN_OPERATOR)
   2092  1.1  mrg 	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
   2093  1.1  mrg 		     "identifier \"%s\" is a special operator name in C++",
   2094  1.1  mrg 		     NODE_NAME (result));
   2095  1.1  mrg     }
   2096  1.1  mrg 
   2097  1.1  mrg   return result;
   2098  1.1  mrg }
   2099  1.1  mrg 
   2100  1.1  mrg /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
   2101  1.1  mrg static void
   2102  1.1  mrg lex_number (cpp_reader *pfile, cpp_string *number,
   2103  1.1  mrg 	    struct normalize_state *nst)
   2104  1.1  mrg {
   2105  1.1  mrg   const uchar *cur;
   2106  1.1  mrg   const uchar *base;
   2107  1.1  mrg   uchar *dest;
   2108  1.1  mrg 
   2109  1.1  mrg   base = pfile->buffer->cur - 1;
   2110  1.1  mrg   do
   2111  1.1  mrg     {
   2112  1.1  mrg       const uchar *adj_digit_sep = NULL;
   2113  1.1  mrg       cur = pfile->buffer->cur;
   2114  1.1  mrg 
   2115  1.1  mrg       /* N.B. ISIDNUM does not include $.  */
   2116  1.1  mrg       while (ISIDNUM (*cur)
   2117  1.1  mrg 	     || (*cur == '.' && !DIGIT_SEP (cur[-1]))
   2118  1.1  mrg 	     || DIGIT_SEP (*cur)
   2119  1.1  mrg 	     || (VALID_SIGN (*cur, cur[-1]) && !DIGIT_SEP (cur[-2])))
   2120  1.1  mrg 	{
   2121  1.1  mrg 	  NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
   2122  1.1  mrg 	  /* Adjacent digit separators do not form part of the pp-number syntax.
   2123  1.1  mrg 	     However, they can safely be diagnosed here as an error, since '' is
   2124  1.1  mrg 	     not a valid preprocessing token.  */
   2125  1.1  mrg 	  if (DIGIT_SEP (*cur) && DIGIT_SEP (cur[-1]) && !adj_digit_sep)
   2126  1.1  mrg 	    adj_digit_sep = cur;
   2127  1.1  mrg 	  cur++;
   2128  1.1  mrg 	}
   2129  1.1  mrg       /* A number can't end with a digit separator.  */
   2130  1.1  mrg       while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1]))
   2131  1.1  mrg 	--cur;
   2132  1.1  mrg       if (adj_digit_sep && adj_digit_sep < cur)
   2133  1.1  mrg 	cpp_error (pfile, CPP_DL_ERROR, "adjacent digit separators");
   2134  1.1  mrg 
   2135  1.1  mrg       pfile->buffer->cur = cur;
   2136  1.1  mrg     }
   2137  1.1  mrg   while (forms_identifier_p (pfile, false, nst));
   2138  1.1  mrg 
   2139  1.1  mrg   number->len = cur - base;
   2140  1.1  mrg   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
   2141  1.1  mrg   memcpy (dest, base, number->len);
   2142  1.1  mrg   dest[number->len] = '\0';
   2143  1.1  mrg   number->text = dest;
   2144  1.1  mrg }
   2145  1.1  mrg 
   2146  1.1  mrg /* Create a token of type TYPE with a literal spelling.  */
   2147  1.1  mrg static void
   2148  1.1  mrg create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
   2149  1.1  mrg 		unsigned int len, enum cpp_ttype type)
   2150  1.1  mrg {
   2151  1.1  mrg   token->type = type;
   2152  1.1  mrg   token->val.str.len = len;
   2153  1.1  mrg   token->val.str.text = cpp_alloc_token_string (pfile, base, len);
   2154  1.1  mrg }
   2155  1.1  mrg 
   2156  1.1  mrg const uchar *
   2157  1.1  mrg cpp_alloc_token_string (cpp_reader *pfile,
   2158  1.1  mrg 			const unsigned char *ptr, unsigned len)
   2159  1.1  mrg {
   2160  1.1  mrg   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
   2161  1.1  mrg 
   2162  1.1  mrg   dest[len] = 0;
   2163  1.1  mrg   memcpy (dest, ptr, len);
   2164  1.1  mrg   return dest;
   2165  1.1  mrg }
   2166  1.1  mrg 
   2167  1.1  mrg /* A pair of raw buffer pointers.  The currently open one is [1], the
   2168  1.1  mrg    first one is [0].  Used for string literal lexing.  */
   2169  1.1  mrg struct lit_accum {
   2170  1.1  mrg   _cpp_buff *first;
   2171  1.1  mrg   _cpp_buff *last;
   2172  1.1  mrg   const uchar *rpos;
   2173  1.1  mrg   size_t accum;
   2174  1.1  mrg 
   2175  1.1  mrg   lit_accum ()
   2176  1.1  mrg     : first (NULL), last (NULL), rpos (0), accum (0)
   2177  1.1  mrg   {
   2178  1.1  mrg   }
   2179  1.1  mrg 
   2180  1.1  mrg   void append (cpp_reader *, const uchar *, size_t);
   2181  1.1  mrg 
   2182  1.1  mrg   void read_begin (cpp_reader *);
   2183  1.1  mrg   bool reading_p () const
   2184  1.1  mrg   {
   2185  1.1  mrg     return rpos != NULL;
   2186  1.1  mrg   }
   2187  1.1  mrg   char read_char ()
   2188  1.1  mrg   {
   2189  1.1  mrg     char c = *rpos++;
   2190  1.1  mrg     if (rpos == BUFF_FRONT (last))
   2191  1.1  mrg       rpos = NULL;
   2192  1.1  mrg     return c;
   2193  1.1  mrg   }
   2194  1.1  mrg };
   2195  1.1  mrg 
   2196  1.1  mrg /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
   2197  1.1  mrg    sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
   2198  1.1  mrg 
   2199  1.1  mrg void
   2200  1.1  mrg lit_accum::append (cpp_reader *pfile, const uchar *base, size_t len)
   2201  1.1  mrg {
   2202  1.1  mrg   if (!last)
   2203  1.1  mrg     /* Starting.  */
   2204  1.1  mrg     first = last = _cpp_get_buff (pfile, len);
   2205  1.1  mrg   else if (len > BUFF_ROOM (last))
   2206  1.1  mrg     {
   2207  1.1  mrg       /* There is insufficient room in the buffer.  Copy what we can,
   2208  1.1  mrg 	 and then either extend or create a new one.  */
   2209  1.1  mrg       size_t room = BUFF_ROOM (last);
   2210  1.1  mrg       memcpy (BUFF_FRONT (last), base, room);
   2211  1.1  mrg       BUFF_FRONT (last) += room;
   2212  1.1  mrg       base += room;
   2213  1.1  mrg       len -= room;
   2214  1.1  mrg       accum += room;
   2215  1.1  mrg 
   2216  1.1  mrg       gcc_checking_assert (!rpos);
   2217  1.1  mrg 
   2218  1.1  mrg       last = _cpp_append_extend_buff (pfile, last, len);
   2219  1.1  mrg     }
   2220  1.1  mrg 
   2221  1.1  mrg   memcpy (BUFF_FRONT (last), base, len);
   2222  1.1  mrg   BUFF_FRONT (last) += len;
   2223  1.1  mrg   accum += len;
   2224  1.1  mrg }
   2225  1.1  mrg 
   2226  1.1  mrg void
   2227  1.1  mrg lit_accum::read_begin (cpp_reader *pfile)
   2228  1.1  mrg {
   2229  1.1  mrg   /* We never accumulate more than 4 chars to read.  */
   2230  1.1  mrg   if (BUFF_ROOM (last) < 4)
   2231  1.1  mrg 
   2232  1.1  mrg     last = _cpp_append_extend_buff (pfile, last, 4);
   2233  1.1  mrg   rpos = BUFF_FRONT (last);
   2234  1.1  mrg }
   2235  1.1  mrg 
   2236  1.1  mrg /* Returns true if a macro has been defined.
   2237  1.1  mrg    This might not work if compile with -save-temps,
   2238  1.1  mrg    or preprocess separately from compilation.  */
   2239  1.1  mrg 
   2240  1.1  mrg static bool
   2241  1.1  mrg is_macro(cpp_reader *pfile, const uchar *base)
   2242  1.1  mrg {
   2243  1.1  mrg   const uchar *cur = base;
   2244  1.1  mrg   if (! ISIDST (*cur))
   2245  1.1  mrg     return false;
   2246  1.1  mrg   unsigned int hash = HT_HASHSTEP (0, *cur);
   2247  1.1  mrg   ++cur;
   2248  1.1  mrg   while (ISIDNUM (*cur))
   2249  1.1  mrg     {
   2250  1.1  mrg       hash = HT_HASHSTEP (hash, *cur);
   2251  1.1  mrg       ++cur;
   2252  1.1  mrg     }
   2253  1.1  mrg   hash = HT_HASHFINISH (hash, cur - base);
   2254  1.1  mrg 
   2255  1.1  mrg   cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
   2256  1.1  mrg 					base, cur - base, hash, HT_NO_INSERT));
   2257  1.1  mrg 
   2258  1.1  mrg   return result && cpp_macro_p (result);
   2259  1.1  mrg }
   2260  1.1  mrg 
   2261  1.1  mrg /* Returns true if a literal suffix does not have the expected form
   2262  1.1  mrg    and is defined as a macro.  */
   2263  1.1  mrg 
   2264  1.1  mrg static bool
   2265  1.1  mrg is_macro_not_literal_suffix(cpp_reader *pfile, const uchar *base)
   2266  1.1  mrg {
   2267  1.1  mrg   /* User-defined literals outside of namespace std must start with a single
   2268  1.1  mrg      underscore, so assume anything of that form really is a UDL suffix.
   2269  1.1  mrg      We don't need to worry about UDLs defined inside namespace std because
   2270  1.1  mrg      their names are reserved, so cannot be used as macro names in valid
   2271  1.1  mrg      programs.  */
   2272  1.1  mrg   if (base[0] == '_' && base[1] != '_')
   2273  1.1  mrg     return false;
   2274  1.1  mrg   return is_macro (pfile, base);
   2275  1.1  mrg }
   2276  1.1  mrg 
   2277  1.1  mrg /* Lexes a raw string.  The stored string contains the spelling,
   2278  1.1  mrg    including double quotes, delimiter string, '(' and ')', any leading
   2279  1.1  mrg    'L', 'u', 'U' or 'u8' and 'R' modifier.  The created token contains
   2280  1.1  mrg    the type of the literal, or CPP_OTHER if it was not properly
   2281  1.1  mrg    terminated.
   2282  1.1  mrg 
   2283  1.1  mrg    BASE is the start of the token.  Updates pfile->buffer->cur to just
   2284  1.1  mrg    after the lexed string.
   2285  1.1  mrg 
   2286  1.1  mrg    The spelling is NUL-terminated, but it is not guaranteed that this
   2287  1.1  mrg    is the first NUL since embedded NULs are preserved.  */
   2288  1.1  mrg 
   2289  1.1  mrg static void
   2290  1.1  mrg lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
   2291  1.1  mrg {
   2292  1.1  mrg   const uchar *pos = base;
   2293  1.1  mrg   const bool warn_bidi_p = pfile->warn_bidi_p ();
   2294  1.1  mrg 
   2295  1.1  mrg   /* 'tis a pity this information isn't passed down from the lexer's
   2296  1.1  mrg      initial categorization of the token.  */
   2297  1.1  mrg   enum cpp_ttype type = CPP_STRING;
   2298  1.1  mrg 
   2299  1.1  mrg   if (*pos == 'L')
   2300  1.1  mrg     {
   2301  1.1  mrg       type = CPP_WSTRING;
   2302  1.1  mrg       pos++;
   2303  1.1  mrg     }
   2304  1.1  mrg   else if (*pos == 'U')
   2305  1.1  mrg     {
   2306  1.1  mrg       type = CPP_STRING32;
   2307  1.1  mrg       pos++;
   2308  1.1  mrg     }
   2309  1.1  mrg   else if (*pos == 'u')
   2310  1.1  mrg     {
   2311  1.1  mrg       if (pos[1] == '8')
   2312  1.1  mrg 	{
   2313  1.1  mrg 	  type = CPP_UTF8STRING;
   2314  1.1  mrg 	  pos++;
   2315  1.1  mrg 	}
   2316  1.1  mrg       else
   2317  1.1  mrg 	type = CPP_STRING16;
   2318  1.1  mrg       pos++;
   2319  1.1  mrg     }
   2320  1.1  mrg 
   2321  1.1  mrg   gcc_checking_assert (pos[0] == 'R' && pos[1] == '"');
   2322  1.1  mrg   pos += 2;
   2323  1.1  mrg 
   2324  1.1  mrg   _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
   2325  1.1  mrg 
   2326  1.1  mrg   /* Skip notes before the ".  */
   2327  1.1  mrg   while (note->pos < pos)
   2328  1.1  mrg     ++note;
   2329  1.1  mrg 
   2330  1.1  mrg   lit_accum accum;
   2331  1.1  mrg 
   2332  1.1  mrg   uchar prefix[17];
   2333  1.1  mrg   unsigned prefix_len = 0;
   2334  1.1  mrg   enum Phase
   2335  1.1  mrg   {
   2336  1.1  mrg    PHASE_PREFIX = -2,
   2337  1.1  mrg    PHASE_NONE = -1,
   2338  1.1  mrg    PHASE_SUFFIX = 0
   2339  1.1  mrg   } phase = PHASE_PREFIX;
   2340  1.1  mrg 
   2341  1.1  mrg   for (;;)
   2342  1.1  mrg     {
   2343  1.1  mrg       gcc_checking_assert (note->pos >= pos);
   2344  1.1  mrg 
   2345  1.1  mrg       /* Undo any escaped newlines and trigraphs.  */
   2346  1.1  mrg       if (!accum.reading_p () && note->pos == pos)
   2347  1.1  mrg 	switch (note->type)
   2348  1.1  mrg 	  {
   2349  1.1  mrg 	  case '\\':
   2350  1.1  mrg 	  case ' ':
   2351  1.1  mrg 	    /* Restore backslash followed by newline.  */
   2352  1.1  mrg 	    accum.append (pfile, base, pos - base);
   2353  1.1  mrg 	    base = pos;
   2354  1.1  mrg 	    accum.read_begin (pfile);
   2355  1.1  mrg 	    accum.append (pfile, UC"\\", 1);
   2356  1.1  mrg 
   2357  1.1  mrg 	  after_backslash:
   2358  1.1  mrg 	    if (note->type == ' ')
   2359  1.1  mrg 	      /* GNU backslash whitespace newline extension.  FIXME
   2360  1.1  mrg 		 could be any sequence of non-vertical space.  When we
   2361  1.1  mrg 		 can properly restore any such sequence, we should
   2362  1.1  mrg 		 mark this note as handled so _cpp_process_line_notes
   2363  1.1  mrg 		 doesn't warn.  */
   2364  1.1  mrg 	      accum.append (pfile, UC" ", 1);
   2365  1.1  mrg 
   2366  1.1  mrg 	    accum.append (pfile, UC"\n", 1);
   2367  1.1  mrg 	    note++;
   2368  1.1  mrg 	    break;
   2369  1.1  mrg 
   2370  1.1  mrg 	  case '\n':
   2371  1.1  mrg 	    /* This can happen for ??/<NEWLINE> when trigraphs are not
   2372  1.1  mrg 	       being interpretted.  */
   2373  1.1  mrg 	    gcc_checking_assert (!CPP_OPTION (pfile, trigraphs));
   2374  1.1  mrg 	    note->type = 0;
   2375  1.1  mrg 	    note++;
   2376  1.1  mrg 	    break;
   2377  1.1  mrg 
   2378  1.1  mrg 	  default:
   2379  1.1  mrg 	    gcc_checking_assert (_cpp_trigraph_map[note->type]);
   2380  1.1  mrg 
   2381  1.1  mrg 	    /* Don't warn about this trigraph in
   2382  1.1  mrg 	       _cpp_process_line_notes, since trigraphs show up as
   2383  1.1  mrg 	       trigraphs in raw strings.  */
   2384  1.1  mrg 	    uchar type = note->type;
   2385  1.1  mrg 	    note->type = 0;
   2386  1.1  mrg 
   2387  1.1  mrg 	    if (CPP_OPTION (pfile, trigraphs))
   2388  1.1  mrg 	      {
   2389  1.1  mrg 		accum.append (pfile, base, pos - base);
   2390  1.1  mrg 		base = pos;
   2391  1.1  mrg 		accum.read_begin (pfile);
   2392  1.1  mrg 		accum.append (pfile, UC"??", 2);
   2393  1.1  mrg 		accum.append (pfile, &type, 1);
   2394  1.1  mrg 
   2395  1.1  mrg 		/* ??/ followed by newline gets two line notes, one for
   2396  1.1  mrg 		   the trigraph and one for the backslash/newline.  */
   2397  1.1  mrg 		if (type == '/' && note[1].pos == pos)
   2398  1.1  mrg 		  {
   2399  1.1  mrg 		    note++;
   2400  1.1  mrg 		    gcc_assert (note->type == '\\' || note->type == ' ');
   2401  1.1  mrg 		    goto after_backslash;
   2402  1.1  mrg 		  }
   2403  1.1  mrg 		/* Skip the replacement character.  */
   2404  1.1  mrg 		base = ++pos;
   2405  1.1  mrg 	      }
   2406  1.1  mrg 
   2407  1.1  mrg 	    note++;
   2408  1.1  mrg 	    break;
   2409  1.1  mrg 	  }
   2410  1.1  mrg 
   2411  1.1  mrg       /* Now get a char to process.  Either from an expanded note, or
   2412  1.1  mrg 	 from the line buffer.  */
   2413  1.1  mrg       bool read_note = accum.reading_p ();
   2414  1.1  mrg       char c = read_note ? accum.read_char () : *pos++;
   2415  1.1  mrg 
   2416  1.1  mrg       if (phase == PHASE_PREFIX)
   2417  1.1  mrg 	{
   2418  1.1  mrg 	  if (c == '(')
   2419  1.1  mrg 	    {
   2420  1.1  mrg 	      /* Done.  */
   2421  1.1  mrg 	      phase = PHASE_NONE;
   2422  1.1  mrg 	      prefix[prefix_len++] = '"';
   2423  1.1  mrg 	    }
   2424  1.1  mrg 	  else if (prefix_len < 16
   2425  1.1  mrg 		   /* Prefix chars are any of the basic character set,
   2426  1.1  mrg 		      [lex.charset] except for '
   2427  1.1  mrg 		      ()\\\t\v\f\n'. Optimized for a contiguous
   2428  1.1  mrg 		      alphabet.  */
   2429  1.1  mrg 		   /* Unlike a switch, this collapses down to one or
   2430  1.1  mrg 		      two shift and bitmask operations on an ASCII
   2431  1.1  mrg 		      system, with an outlier or two.   */
   2432  1.1  mrg 		   && (('Z' - 'A' == 25
   2433  1.1  mrg 			? ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
   2434  1.1  mrg 			: ISIDST (c))
   2435  1.1  mrg 		       || (c >= '0' && c <= '9')
   2436  1.1  mrg 		       || c == '_' || c == '{' || c == '}'
   2437  1.1  mrg 		       || c == '[' || c == ']' || c == '#'
   2438  1.1  mrg 		       || c == '<' || c == '>' || c == '%'
   2439  1.1  mrg 		       || c == ':' || c == ';' || c == '.' || c == '?'
   2440  1.1  mrg 		       || c == '*' || c == '+' || c == '-' || c == '/'
   2441  1.1  mrg 		       || c == '^' || c == '&' || c == '|' || c == '~'
   2442  1.1  mrg 		       || c == '!' || c == '=' || c == ','
   2443  1.1  mrg 		       || c == '"' || c == '\''))
   2444  1.1  mrg 	    prefix[prefix_len++] = c;
   2445  1.1  mrg 	  else
   2446  1.1  mrg 	    {
   2447  1.1  mrg 	      /* Something is wrong.  */
   2448  1.1  mrg 	      int col = CPP_BUF_COLUMN (pfile->buffer, pos) + read_note;
   2449  1.1  mrg 	      if (prefix_len == 16)
   2450  1.1  mrg 		cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
   2451  1.1  mrg 				     col, "raw string delimiter longer "
   2452  1.1  mrg 				     "than 16 characters");
   2453  1.1  mrg 	      else if (c == '\n')
   2454  1.1  mrg 		cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
   2455  1.1  mrg 				     col, "invalid new-line in raw "
   2456  1.1  mrg 				     "string delimiter");
   2457  1.1  mrg 	      else
   2458  1.1  mrg 		cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
   2459  1.1  mrg 				     col, "invalid character '%c' in "
   2460  1.1  mrg 				     "raw string delimiter", c);
   2461  1.1  mrg 	      type = CPP_OTHER;
   2462  1.1  mrg 	      phase = PHASE_NONE;
   2463  1.1  mrg 	      /* Continue until we get a close quote, that's probably
   2464  1.1  mrg 		 the best failure mode.  */
   2465  1.1  mrg 	      prefix_len = 0;
   2466  1.1  mrg 	    }
   2467  1.1  mrg 	  if (c != '\n')
   2468  1.1  mrg 	    continue;
   2469  1.1  mrg 	}
   2470  1.1  mrg 
   2471  1.1  mrg       if (phase != PHASE_NONE)
   2472  1.1  mrg 	{
   2473  1.1  mrg 	  if (prefix[phase] != c)
   2474  1.1  mrg 	    phase = PHASE_NONE;
   2475  1.1  mrg 	  else if (unsigned (phase + 1) == prefix_len)
   2476  1.1  mrg 	    break;
   2477  1.1  mrg 	  else
   2478  1.1  mrg 	    {
   2479  1.1  mrg 	      phase = Phase (phase + 1);
   2480  1.1  mrg 	      continue;
   2481  1.1  mrg 	    }
   2482  1.1  mrg 	}
   2483  1.1  mrg 
   2484  1.1  mrg       if (!prefix_len && c == '"')
   2485  1.1  mrg 	/* Failure mode lexing.  */
   2486  1.1  mrg 	goto out;
   2487  1.1  mrg       else if (prefix_len && c == ')')
   2488  1.1  mrg 	phase = PHASE_SUFFIX;
   2489  1.1  mrg       else if (!read_note && c == '\n')
   2490  1.1  mrg 	{
   2491  1.1  mrg 	  pos--;
   2492  1.1  mrg 	  pfile->buffer->cur = pos;
   2493  1.1  mrg 	  if (pfile->state.in_directive
   2494  1.1  mrg 	      || (pfile->state.parsing_args
   2495  1.1  mrg 		  && pfile->buffer->next_line >= pfile->buffer->rlimit))
   2496  1.1  mrg 	    {
   2497  1.1  mrg 	      cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
   2498  1.1  mrg 				   "unterminated raw string");
   2499  1.1  mrg 	      type = CPP_OTHER;
   2500  1.1  mrg 	      goto out;
   2501  1.1  mrg 	    }
   2502  1.1  mrg 
   2503  1.1  mrg 	  accum.append (pfile, base, pos - base + 1);
   2504  1.1  mrg 	  _cpp_process_line_notes (pfile, false);
   2505  1.1  mrg 
   2506  1.1  mrg 	  if (pfile->buffer->next_line < pfile->buffer->rlimit)
   2507  1.1  mrg 	    CPP_INCREMENT_LINE (pfile, 0);
   2508  1.1  mrg 	  pfile->buffer->need_line = true;
   2509  1.1  mrg 
   2510  1.1  mrg 	  if (!_cpp_get_fresh_line (pfile))
   2511  1.1  mrg 	    {
   2512  1.1  mrg 	      /* We ran out of file and failed to get a line.  */
   2513  1.1  mrg 	      location_t src_loc = token->src_loc;
   2514  1.1  mrg 	      token->type = CPP_EOF;
   2515  1.1  mrg 	      /* Tell the compiler the line number of the EOF token.  */
   2516  1.1  mrg 	      token->src_loc = pfile->line_table->highest_line;
   2517  1.1  mrg 	      token->flags = BOL;
   2518  1.1  mrg 	      if (accum.first)
   2519  1.1  mrg 		_cpp_release_buff (pfile, accum.first);
   2520  1.1  mrg 	      cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
   2521  1.1  mrg 				   "unterminated raw string");
   2522  1.1  mrg 	      /* Now pop the buffer that _cpp_get_fresh_line did not.  */
   2523  1.1  mrg 	      _cpp_pop_buffer (pfile);
   2524  1.1  mrg 	      return;
   2525  1.1  mrg 	    }
   2526  1.1  mrg 
   2527  1.1  mrg 	  pos = base = pfile->buffer->cur;
   2528  1.1  mrg 	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
   2529  1.1  mrg 	}
   2530  1.1  mrg       else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0)
   2531  1.1  mrg 	       && warn_bidi_p)
   2532  1.1  mrg 	{
   2533  1.1  mrg 	  location_t loc;
   2534  1.1  mrg 	  bidi::kind kind = get_bidi_utf8 (pfile, pos - 1, &loc);
   2535  1.1  mrg 	  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
   2536  1.1  mrg 	}
   2537  1.1  mrg     }
   2538  1.1  mrg 
   2539  1.1  mrg   if (warn_bidi_p)
   2540  1.1  mrg     maybe_warn_bidi_on_close (pfile, pos);
   2541  1.1  mrg 
   2542  1.1  mrg   if (CPP_OPTION (pfile, user_literals))
   2543  1.1  mrg     {
   2544  1.1  mrg       /* If a string format macro, say from inttypes.h, is placed touching
   2545  1.1  mrg 	 a string literal it could be parsed as a C++11 user-defined string
   2546  1.1  mrg 	 literal thus breaking the program.  */
   2547  1.1  mrg       if (is_macro_not_literal_suffix (pfile, pos))
   2548  1.1  mrg 	{
   2549  1.1  mrg 	  /* Raise a warning, but do not consume subsequent tokens.  */
   2550  1.1  mrg 	  if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
   2551  1.1  mrg 	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
   2552  1.1  mrg 				   token->src_loc, 0,
   2553  1.1  mrg 				   "invalid suffix on literal; C++11 requires "
   2554  1.1  mrg 				   "a space between literal and string macro");
   2555  1.1  mrg 	}
   2556  1.1  mrg       /* Grab user defined literal suffix.  */
   2557  1.1  mrg       else if (ISIDST (*pos))
   2558  1.1  mrg 	{
   2559  1.1  mrg 	  type = cpp_userdef_string_add_type (type);
   2560  1.1  mrg 	  ++pos;
   2561  1.1  mrg 
   2562  1.1  mrg 	  while (ISIDNUM (*pos))
   2563  1.1  mrg 	    ++pos;
   2564  1.1  mrg 	}
   2565  1.1  mrg     }
   2566  1.1  mrg 
   2567  1.1  mrg  out:
   2568  1.1  mrg   pfile->buffer->cur = pos;
   2569  1.1  mrg   if (!accum.accum)
   2570  1.1  mrg     create_literal (pfile, token, base, pos - base, type);
   2571  1.1  mrg   else
   2572  1.1  mrg     {
   2573  1.1  mrg       size_t extra_len = pos - base;
   2574  1.1  mrg       uchar *dest = _cpp_unaligned_alloc (pfile, accum.accum + extra_len + 1);
   2575  1.1  mrg 
   2576  1.1  mrg       token->type = type;
   2577  1.1  mrg       token->val.str.len = accum.accum + extra_len;
   2578  1.1  mrg       token->val.str.text = dest;
   2579  1.1  mrg       for (_cpp_buff *buf = accum.first; buf; buf = buf->next)
   2580  1.1  mrg 	{
   2581  1.1  mrg 	  size_t len = BUFF_FRONT (buf) - buf->base;
   2582  1.1  mrg 	  memcpy (dest, buf->base, len);
   2583  1.1  mrg 	  dest += len;
   2584  1.1  mrg 	}
   2585  1.1  mrg       _cpp_release_buff (pfile, accum.first);
   2586  1.1  mrg       memcpy (dest, base, extra_len);
   2587  1.1  mrg       dest[extra_len] = '\0';
   2588  1.1  mrg     }
   2589  1.1  mrg }
   2590  1.1  mrg 
   2591  1.1  mrg /* Lexes a string, character constant, or angle-bracketed header file
   2592  1.1  mrg    name.  The stored string contains the spelling, including opening
   2593  1.1  mrg    quote and any leading 'L', 'u', 'U' or 'u8' and optional
   2594  1.1  mrg    'R' modifier.  It returns the type of the literal, or CPP_OTHER
   2595  1.1  mrg    if it was not properly terminated, or CPP_LESS for an unterminated
   2596  1.1  mrg    header name which must be relexed as normal tokens.
   2597  1.1  mrg 
   2598  1.1  mrg    The spelling is NUL-terminated, but it is not guaranteed that this
   2599  1.1  mrg    is the first NUL since embedded NULs are preserved.  */
   2600  1.1  mrg static void
   2601  1.1  mrg lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
   2602  1.1  mrg {
   2603  1.1  mrg   bool saw_NUL = false;
   2604  1.1  mrg   const uchar *cur;
   2605  1.1  mrg   cppchar_t terminator;
   2606  1.1  mrg   enum cpp_ttype type;
   2607  1.1  mrg 
   2608  1.1  mrg   cur = base;
   2609  1.1  mrg   terminator = *cur++;
   2610  1.1  mrg   if (terminator == 'L' || terminator == 'U')
   2611  1.1  mrg     terminator = *cur++;
   2612  1.1  mrg   else if (terminator == 'u')
   2613  1.1  mrg     {
   2614  1.1  mrg       terminator = *cur++;
   2615  1.1  mrg       if (terminator == '8')
   2616  1.1  mrg 	terminator = *cur++;
   2617  1.1  mrg     }
   2618  1.1  mrg   if (terminator == 'R')
   2619  1.1  mrg     {
   2620  1.1  mrg       lex_raw_string (pfile, token, base);
   2621  1.1  mrg       return;
   2622  1.1  mrg     }
   2623  1.1  mrg   if (terminator == '"')
   2624  1.1  mrg     type = (*base == 'L' ? CPP_WSTRING :
   2625  1.1  mrg 	    *base == 'U' ? CPP_STRING32 :
   2626  1.1  mrg 	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
   2627  1.1  mrg 			 : CPP_STRING);
   2628  1.1  mrg   else if (terminator == '\'')
   2629  1.1  mrg     type = (*base == 'L' ? CPP_WCHAR :
   2630  1.1  mrg 	    *base == 'U' ? CPP_CHAR32 :
   2631  1.1  mrg 	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8CHAR : CPP_CHAR16)
   2632  1.1  mrg 			 : CPP_CHAR);
   2633  1.1  mrg   else
   2634  1.1  mrg     terminator = '>', type = CPP_HEADER_NAME;
   2635  1.1  mrg 
   2636  1.1  mrg   const bool warn_bidi_p = pfile->warn_bidi_p ();
   2637  1.1  mrg   for (;;)
   2638  1.1  mrg     {
   2639  1.1  mrg       cppchar_t c = *cur++;
   2640  1.1  mrg 
   2641  1.1  mrg       /* In #include-style directives, terminators are not escapable.  */
   2642  1.1  mrg       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
   2643  1.1  mrg 	{
   2644  1.1  mrg 	  if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p)
   2645  1.1  mrg 	    {
   2646  1.1  mrg 	      location_t loc;
   2647  1.1  mrg 	      bidi::kind kind = get_bidi_ucn (pfile, cur + 1, cur[0] == 'U',
   2648  1.1  mrg 					      &loc);
   2649  1.1  mrg 	      maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc);
   2650  1.1  mrg 	    }
   2651  1.1  mrg 	  cur++;
   2652  1.1  mrg 	}
   2653  1.1  mrg       else if (c == terminator)
   2654  1.1  mrg 	{
   2655  1.1  mrg 	  if (warn_bidi_p)
   2656  1.1  mrg 	    maybe_warn_bidi_on_close (pfile, cur - 1);
   2657  1.1  mrg 	  break;
   2658  1.1  mrg 	}
   2659  1.1  mrg       else if (c == '\n')
   2660  1.1  mrg 	{
   2661  1.1  mrg 	  cur--;
   2662  1.1  mrg 	  /* Unmatched quotes always yield undefined behavior, but
   2663  1.1  mrg 	     greedy lexing means that what appears to be an unterminated
   2664  1.1  mrg 	     header name may actually be a legitimate sequence of tokens.  */
   2665  1.1  mrg 	  if (terminator == '>')
   2666  1.1  mrg 	    {
   2667  1.1  mrg 	      token->type = CPP_LESS;
   2668  1.1  mrg 	      return;
   2669  1.1  mrg 	    }
   2670  1.1  mrg 	  type = CPP_OTHER;
   2671  1.1  mrg 	  break;
   2672  1.1  mrg 	}
   2673  1.1  mrg       else if (c == '\0')
   2674  1.1  mrg 	saw_NUL = true;
   2675  1.1  mrg       else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
   2676  1.1  mrg 	{
   2677  1.1  mrg 	  location_t loc;
   2678  1.1  mrg 	  bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
   2679  1.1  mrg 	  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
   2680  1.1  mrg 	}
   2681  1.1  mrg     }
   2682  1.1  mrg 
   2683  1.1  mrg   if (saw_NUL && !pfile->state.skipping)
   2684  1.1  mrg     cpp_error (pfile, CPP_DL_WARNING,
   2685  1.1  mrg 	       "null character(s) preserved in literal");
   2686  1.1  mrg 
   2687  1.1  mrg   if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
   2688  1.1  mrg     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
   2689  1.1  mrg 	       (int) terminator);
   2690  1.1  mrg 
   2691  1.1  mrg   if (CPP_OPTION (pfile, user_literals))
   2692  1.1  mrg     {
   2693  1.1  mrg       /* If a string format macro, say from inttypes.h, is placed touching
   2694  1.1  mrg 	 a string literal it could be parsed as a C++11 user-defined string
   2695  1.1  mrg 	 literal thus breaking the program.  */
   2696  1.1  mrg       if (is_macro_not_literal_suffix (pfile, cur))
   2697  1.1  mrg 	{
   2698  1.1  mrg 	  /* Raise a warning, but do not consume subsequent tokens.  */
   2699  1.1  mrg 	  if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
   2700  1.1  mrg 	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
   2701  1.1  mrg 				   token->src_loc, 0,
   2702  1.1  mrg 				   "invalid suffix on literal; C++11 requires "
   2703  1.1  mrg 				   "a space between literal and string macro");
   2704  1.1  mrg 	}
   2705  1.1  mrg       /* Grab user defined literal suffix.  */
   2706  1.1  mrg       else if (ISIDST (*cur))
   2707  1.1  mrg 	{
   2708  1.1  mrg 	  type = cpp_userdef_char_add_type (type);
   2709  1.1  mrg 	  type = cpp_userdef_string_add_type (type);
   2710  1.1  mrg           ++cur;
   2711  1.1  mrg 
   2712  1.1  mrg 	  while (ISIDNUM (*cur))
   2713  1.1  mrg 	    ++cur;
   2714  1.1  mrg 	}
   2715  1.1  mrg     }
   2716  1.1  mrg   else if (CPP_OPTION (pfile, cpp_warn_cxx11_compat)
   2717  1.1  mrg 	   && is_macro (pfile, cur)
   2718  1.1  mrg 	   && !pfile->state.skipping)
   2719  1.1  mrg     cpp_warning_with_line (pfile, CPP_W_CXX11_COMPAT,
   2720  1.1  mrg 			   token->src_loc, 0, "C++11 requires a space "
   2721  1.1  mrg 			   "between string literal and macro");
   2722  1.1  mrg 
   2723  1.1  mrg   pfile->buffer->cur = cur;
   2724  1.1  mrg   create_literal (pfile, token, base, cur - base, type);
   2725  1.1  mrg }
   2726  1.1  mrg 
   2727  1.1  mrg /* Return the comment table. The client may not make any assumption
   2728  1.1  mrg    about the ordering of the table.  */
   2729  1.1  mrg cpp_comment_table *
   2730  1.1  mrg cpp_get_comments (cpp_reader *pfile)
   2731  1.1  mrg {
   2732  1.1  mrg   return &pfile->comments;
   2733  1.1  mrg }
   2734  1.1  mrg 
   2735  1.1  mrg /* Append a comment to the end of the comment table. */
   2736  1.1  mrg static void
   2737  1.1  mrg store_comment (cpp_reader *pfile, cpp_token *token)
   2738  1.1  mrg {
   2739  1.1  mrg   int len;
   2740  1.1  mrg 
   2741  1.1  mrg   if (pfile->comments.allocated == 0)
   2742  1.1  mrg     {
   2743  1.1  mrg       pfile->comments.allocated = 256;
   2744  1.1  mrg       pfile->comments.entries = (cpp_comment *) xmalloc
   2745  1.1  mrg 	(pfile->comments.allocated * sizeof (cpp_comment));
   2746  1.1  mrg     }
   2747  1.1  mrg 
   2748  1.1  mrg   if (pfile->comments.count == pfile->comments.allocated)
   2749  1.1  mrg     {
   2750  1.1  mrg       pfile->comments.allocated *= 2;
   2751  1.1  mrg       pfile->comments.entries = (cpp_comment *) xrealloc
   2752  1.1  mrg 	(pfile->comments.entries,
   2753  1.1  mrg 	 pfile->comments.allocated * sizeof (cpp_comment));
   2754  1.1  mrg     }
   2755  1.1  mrg 
   2756  1.1  mrg   len = token->val.str.len;
   2757  1.1  mrg 
   2758  1.1  mrg   /* Copy comment. Note, token may not be NULL terminated. */
   2759  1.1  mrg   pfile->comments.entries[pfile->comments.count].comment =
   2760  1.1  mrg     (char *) xmalloc (sizeof (char) * (len + 1));
   2761  1.1  mrg   memcpy (pfile->comments.entries[pfile->comments.count].comment,
   2762  1.1  mrg 	  token->val.str.text, len);
   2763  1.1  mrg   pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
   2764  1.1  mrg 
   2765  1.1  mrg   /* Set source location. */
   2766  1.1  mrg   pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
   2767  1.1  mrg 
   2768  1.1  mrg   /* Increment the count of entries in the comment table. */
   2769  1.1  mrg   pfile->comments.count++;
   2770  1.1  mrg }
   2771  1.1  mrg 
   2772  1.1  mrg /* The stored comment includes the comment start and any terminator.  */
   2773  1.1  mrg static void
   2774  1.1  mrg save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
   2775  1.1  mrg 	      cppchar_t type)
   2776  1.1  mrg {
   2777  1.1  mrg   unsigned char *buffer;
   2778  1.2  mrg   unsigned int len, clen, i;
   2779  1.2  mrg   int convert_to_c = (pfile->state.in_directive || pfile->state.parsing_args)
   2780  1.1  mrg     && type == '/';
   2781  1.1  mrg 
   2782  1.1  mrg   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
   2783  1.1  mrg 
   2784  1.1  mrg   /* C++ comments probably (not definitely) have moved past a new
   2785  1.1  mrg      line, which we don't want to save in the comment.  */
   2786  1.1  mrg   if (is_vspace (pfile->buffer->cur[-1]))
   2787  1.1  mrg     len--;
   2788  1.1  mrg 
   2789  1.1  mrg   /* If we are currently in a directive or in argument parsing, then
   2790  1.1  mrg      we need to store all C++ comments as C comments internally, and
   2791  1.1  mrg      so we need to allocate a little extra space in that case.
   2792  1.1  mrg 
   2793  1.1  mrg      Note that the only time we encounter a directive here is
   2794  1.2  mrg      when we are saving comments in a "#define".  */
   2795  1.1  mrg   clen = convert_to_c ? len + 2 : len;
   2796  1.1  mrg 
   2797  1.1  mrg   buffer = _cpp_unaligned_alloc (pfile, clen);
   2798  1.1  mrg 
   2799  1.1  mrg   token->type = CPP_COMMENT;
   2800  1.1  mrg   token->val.str.len = clen;
   2801  1.1  mrg   token->val.str.text = buffer;
   2802  1.1  mrg 
   2803  1.1  mrg   buffer[0] = '/';
   2804  1.1  mrg   memcpy (buffer + 1, from, len - 1);
   2805  1.1  mrg 
   2806  1.2  mrg   /* Finish conversion to a C comment, if necessary.  */
   2807  1.1  mrg   if (convert_to_c)
   2808  1.1  mrg     {
   2809  1.1  mrg       buffer[1] = '*';
   2810  1.1  mrg       buffer[clen - 2] = '*';
   2811  1.1  mrg       buffer[clen - 1] = '/';
   2812  1.1  mrg       /* As there can be in a C++ comments illegal sequences for C comments
   2813  1.1  mrg          we need to filter them out.  */
   2814  1.1  mrg       for (i = 2; i < (clen - 2); i++)
   2815  1.1  mrg         if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
   2816  1.1  mrg           buffer[i] = '|';
   2817  1.1  mrg     }
   2818  1.1  mrg 
   2819  1.1  mrg   /* Finally store this comment for use by clients of libcpp. */
   2820  1.1  mrg   store_comment (pfile, token);
   2821  1.1  mrg }
   2822  1.1  mrg 
   2823  1.1  mrg /* Returns true if comment at COMMENT_START is a recognized FALLTHROUGH
   2824  1.1  mrg    comment.  */
   2825  1.1  mrg 
   2826  1.1  mrg static bool
   2827  1.1  mrg fallthrough_comment_p (cpp_reader *pfile, const unsigned char *comment_start)
   2828  1.1  mrg {
   2829  1.1  mrg   const unsigned char *from = comment_start + 1;
   2830  1.1  mrg 
   2831  1.1  mrg   switch (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough))
   2832  1.1  mrg     {
   2833  1.1  mrg       /* For both -Wimplicit-fallthrough=0 and -Wimplicit-fallthrough=5 we
   2834  1.1  mrg 	 don't recognize any comments.  The latter only checks attributes,
   2835  1.1  mrg 	 the former doesn't warn.  */
   2836  1.1  mrg     case 0:
   2837  1.1  mrg     default:
   2838  1.1  mrg       return false;
   2839  1.1  mrg       /* -Wimplicit-fallthrough=1 considers any comment, no matter what
   2840  1.1  mrg 	 content it has.  */
   2841  1.1  mrg     case 1:
   2842  1.1  mrg       return true;
   2843  1.1  mrg     case 2:
   2844  1.1  mrg       /* -Wimplicit-fallthrough=2 looks for (case insensitive)
   2845  1.1  mrg 	 .*falls?[ \t-]*thr(u|ough).* regex.  */
   2846  1.1  mrg       for (; (size_t) (pfile->buffer->cur - from) >= sizeof "fallthru" - 1;
   2847  1.1  mrg 	   from++)
   2848  1.1  mrg 	{
   2849  1.1  mrg 	  /* Is there anything like strpbrk with upper boundary, or
   2850  1.1  mrg 	     memchr looking for 2 characters rather than just one?  */
   2851  1.1  mrg 	  if (from[0] != 'f' && from[0] != 'F')
   2852  1.1  mrg 	    continue;
   2853  1.1  mrg 	  if (from[1] != 'a' && from[1] != 'A')
   2854  1.1  mrg 	    continue;
   2855  1.1  mrg 	  if (from[2] != 'l' && from[2] != 'L')
   2856  1.1  mrg 	    continue;
   2857  1.1  mrg 	  if (from[3] != 'l' && from[3] != 'L')
   2858  1.1  mrg 	    continue;
   2859  1.1  mrg 	  from += sizeof "fall" - 1;
   2860  1.1  mrg 	  if (from[0] == 's' || from[0] == 'S')
   2861  1.1  mrg 	    from++;
   2862  1.1  mrg 	  while (*from == ' ' || *from == '\t' || *from == '-')
   2863  1.1  mrg 	    from++;
   2864  1.1  mrg 	  if (from[0] != 't' && from[0] != 'T')
   2865  1.1  mrg 	    continue;
   2866  1.1  mrg 	  if (from[1] != 'h' && from[1] != 'H')
   2867  1.1  mrg 	    continue;
   2868  1.1  mrg 	  if (from[2] != 'r' && from[2] != 'R')
   2869  1.1  mrg 	    continue;
   2870  1.1  mrg 	  if (from[3] == 'u' || from[3] == 'U')
   2871  1.1  mrg 	    return true;
   2872  1.1  mrg 	  if (from[3] != 'o' && from[3] != 'O')
   2873  1.1  mrg 	    continue;
   2874  1.1  mrg 	  if (from[4] != 'u' && from[4] != 'U')
   2875  1.1  mrg 	    continue;
   2876  1.1  mrg 	  if (from[5] != 'g' && from[5] != 'G')
   2877  1.1  mrg 	    continue;
   2878  1.1  mrg 	  if (from[6] != 'h' && from[6] != 'H')
   2879  1.1  mrg 	    continue;
   2880  1.1  mrg 	  return true;
   2881  1.1  mrg 	}
   2882  1.1  mrg       return false;
   2883  1.1  mrg     case 3:
   2884  1.1  mrg     case 4:
   2885  1.1  mrg       break;
   2886  1.1  mrg     }
   2887  1.1  mrg 
   2888  1.1  mrg   /* Whole comment contents:
   2889  1.1  mrg      -fallthrough
   2890  1.1  mrg      @fallthrough@
   2891  1.1  mrg    */
   2892  1.1  mrg   if (*from == '-' || *from == '@')
   2893  1.1  mrg     {
   2894  1.1  mrg       size_t len = sizeof "fallthrough" - 1;
   2895  1.1  mrg       if ((size_t) (pfile->buffer->cur - from - 1) < len)
   2896  1.1  mrg 	return false;
   2897  1.1  mrg       if (memcmp (from + 1, "fallthrough", len))
   2898  1.1  mrg 	return false;
   2899  1.1  mrg       if (*from == '@')
   2900  1.1  mrg 	{
   2901  1.1  mrg 	  if (from[len + 1] != '@')
   2902  1.1  mrg 	    return false;
   2903  1.1  mrg 	  len++;
   2904  1.1  mrg 	}
   2905  1.1  mrg       from += 1 + len;
   2906  1.1  mrg     }
   2907  1.1  mrg   /* Whole comment contents (regex):
   2908  1.1  mrg      lint -fallthrough[ \t]*
   2909  1.1  mrg    */
   2910  1.1  mrg   else if (*from == 'l')
   2911  1.1  mrg     {
   2912  1.1  mrg       size_t len = sizeof "int -fallthrough" - 1;
   2913  1.1  mrg       if ((size_t) (pfile->buffer->cur - from - 1) < len)
   2914  1.1  mrg 	return false;
   2915  1.1  mrg       if (memcmp (from + 1, "int -fallthrough", len))
   2916  1.1  mrg 	return false;
   2917  1.1  mrg       from += 1 + len;
   2918  1.1  mrg       while (*from == ' ' || *from == '\t')
   2919  1.1  mrg 	from++;
   2920  1.1  mrg     }
   2921  1.1  mrg   /* Whole comment contents (regex):
   2922  1.1  mrg      [ \t]*FALLTHR(U|OUGH)[ \t]*
   2923  1.1  mrg    */
   2924  1.1  mrg   else if (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough) == 4)
   2925  1.1  mrg     {
   2926  1.1  mrg       while (*from == ' ' || *from == '\t')
   2927  1.1  mrg 	from++;
   2928  1.1  mrg       if ((size_t) (pfile->buffer->cur - from)  < sizeof "FALLTHRU" - 1)
   2929  1.1  mrg 	return false;
   2930  1.1  mrg       if (memcmp (from, "FALLTHR", sizeof "FALLTHR" - 1))
   2931  1.1  mrg 	return false;
   2932  1.1  mrg       from += sizeof "FALLTHR" - 1;
   2933  1.1  mrg       if (*from == 'U')
   2934  1.1  mrg 	from++;
   2935  1.1  mrg       else if ((size_t) (pfile->buffer->cur - from)  < sizeof "OUGH" - 1)
   2936  1.1  mrg 	return false;
   2937  1.1  mrg       else if (memcmp (from, "OUGH", sizeof "OUGH" - 1))
   2938  1.1  mrg 	return false;
   2939  1.1  mrg       else
   2940  1.1  mrg 	from += sizeof "OUGH" - 1;
   2941  1.1  mrg       while (*from == ' ' || *from == '\t')
   2942  1.1  mrg 	from++;
   2943  1.1  mrg     }
   2944  1.1  mrg   /* Whole comment contents (regex):
   2945  1.1  mrg      [ \t.!]*(ELSE,? |INTENTIONAL(LY)? )?FALL(S | |-)?THR(OUGH|U)[ \t.!]*(-[^\n\r]*)?
   2946  1.1  mrg      [ \t.!]*(Else,? |Intentional(ly)? )?Fall((s | |-)[Tt]|t)hr(ough|u)[ \t.!]*(-[^\n\r]*)?
   2947  1.1  mrg      [ \t.!]*([Ee]lse,? |[Ii]ntentional(ly)? )?fall(s | |-)?thr(ough|u)[ \t.!]*(-[^\n\r]*)?
   2948  1.1  mrg    */
   2949  1.1  mrg   else
   2950  1.1  mrg     {
   2951  1.1  mrg       while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
   2952  1.1  mrg 	from++;
   2953  1.1  mrg       unsigned char f = *from;
   2954  1.1  mrg       bool all_upper = false;
   2955  1.1  mrg       if (f == 'E' || f == 'e')
   2956  1.1  mrg 	{
   2957  1.1  mrg 	  if ((size_t) (pfile->buffer->cur - from)
   2958  1.1  mrg 	      < sizeof "else fallthru" - 1)
   2959  1.1  mrg 	    return false;
   2960  1.1  mrg 	  if (f == 'E' && memcmp (from + 1, "LSE", sizeof "LSE" - 1) == 0)
   2961  1.1  mrg 	    all_upper = true;
   2962  1.1  mrg 	  else if (memcmp (from + 1, "lse", sizeof "lse" - 1))
   2963  1.1  mrg 	    return false;
   2964  1.1  mrg 	  from += sizeof "else" - 1;
   2965  1.1  mrg 	  if (*from == ',')
   2966  1.1  mrg 	    from++;
   2967  1.1  mrg 	  if (*from != ' ')
   2968  1.1  mrg 	    return false;
   2969  1.1  mrg 	  from++;
   2970  1.1  mrg 	  if (all_upper && *from == 'f')
   2971  1.1  mrg 	    return false;
   2972  1.1  mrg 	  if (f == 'e' && *from == 'F')
   2973  1.1  mrg 	    return false;
   2974  1.1  mrg 	  f = *from;
   2975  1.1  mrg 	}
   2976  1.1  mrg       else if (f == 'I' || f == 'i')
   2977  1.1  mrg 	{
   2978  1.1  mrg 	  if ((size_t) (pfile->buffer->cur - from)
   2979  1.1  mrg 	      < sizeof "intentional fallthru" - 1)
   2980  1.1  mrg 	    return false;
   2981  1.1  mrg 	  if (f == 'I' && memcmp (from + 1, "NTENTIONAL",
   2982  1.1  mrg 				  sizeof "NTENTIONAL" - 1) == 0)
   2983  1.1  mrg 	    all_upper = true;
   2984  1.1  mrg 	  else if (memcmp (from + 1, "ntentional",
   2985  1.1  mrg 			   sizeof "ntentional" - 1))
   2986  1.1  mrg 	    return false;
   2987  1.1  mrg 	  from += sizeof "intentional" - 1;
   2988  1.1  mrg 	  if (*from == ' ')
   2989  1.1  mrg 	    {
   2990  1.1  mrg 	      from++;
   2991  1.1  mrg 	      if (all_upper && *from == 'f')
   2992  1.1  mrg 		return false;
   2993  1.1  mrg 	    }
   2994  1.1  mrg 	  else if (all_upper)
   2995  1.1  mrg 	    {
   2996  1.1  mrg 	      if (memcmp (from, "LY F", sizeof "LY F" - 1))
   2997  1.1  mrg 		return false;
   2998  1.1  mrg 	      from += sizeof "LY " - 1;
   2999  1.1  mrg 	    }
   3000  1.1  mrg 	  else
   3001  1.1  mrg 	    {
   3002  1.1  mrg 	      if (memcmp (from, "ly ", sizeof "ly " - 1))
   3003  1.1  mrg 		return false;
   3004  1.1  mrg 	      from += sizeof "ly " - 1;
   3005  1.1  mrg 	    }
   3006  1.1  mrg 	  if (f == 'i' && *from == 'F')
   3007  1.1  mrg 	    return false;
   3008  1.1  mrg 	  f = *from;
   3009  1.1  mrg 	}
   3010  1.1  mrg       if (f != 'F' && f != 'f')
   3011  1.1  mrg 	return false;
   3012  1.1  mrg       if ((size_t) (pfile->buffer->cur - from) < sizeof "fallthru" - 1)
   3013  1.1  mrg 	return false;
   3014  1.1  mrg       if (f == 'F' && memcmp (from + 1, "ALL", sizeof "ALL" - 1) == 0)
   3015  1.1  mrg 	all_upper = true;
   3016  1.1  mrg       else if (all_upper)
   3017  1.1  mrg 	return false;
   3018  1.1  mrg       else if (memcmp (from + 1, "all", sizeof "all" - 1))
   3019  1.1  mrg 	return false;
   3020  1.1  mrg       from += sizeof "fall" - 1;
   3021  1.1  mrg       if (*from == (all_upper ? 'S' : 's') && from[1] == ' ')
   3022  1.1  mrg 	from += 2;
   3023  1.1  mrg       else if (*from == ' ' || *from == '-')
   3024  1.1  mrg 	from++;
   3025  1.1  mrg       else if (*from != (all_upper ? 'T' : 't'))
   3026  1.1  mrg 	return false;
   3027  1.1  mrg       if ((f == 'f' || *from != 'T') && (all_upper || *from != 't'))
   3028  1.1  mrg 	return false;
   3029  1.1  mrg       if ((size_t) (pfile->buffer->cur - from) < sizeof "thru" - 1)
   3030  1.1  mrg 	return false;
   3031  1.1  mrg       if (memcmp (from + 1, all_upper ? "HRU" : "hru", sizeof "hru" - 1))
   3032  1.1  mrg 	{
   3033  1.1  mrg 	  if ((size_t) (pfile->buffer->cur - from) < sizeof "through" - 1)
   3034  1.1  mrg 	    return false;
   3035  1.1  mrg 	  if (memcmp (from + 1, all_upper ? "HROUGH" : "hrough",
   3036  1.1  mrg 		      sizeof "hrough" - 1))
   3037  1.1  mrg 	    return false;
   3038  1.1  mrg 	  from += sizeof "through" - 1;
   3039  1.1  mrg 	}
   3040  1.1  mrg       else
   3041  1.1  mrg 	from += sizeof "thru" - 1;
   3042  1.1  mrg       while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
   3043  1.1  mrg 	from++;
   3044  1.1  mrg       if (*from == '-')
   3045  1.1  mrg 	{
   3046  1.1  mrg 	  from++;
   3047  1.1  mrg 	  if (*comment_start == '*')
   3048  1.1  mrg 	    {
   3049  1.1  mrg 	      do
   3050  1.1  mrg 		{
   3051  1.1  mrg 		  while (*from && *from != '*'
   3052  1.1  mrg 			 && *from != '\n' && *from != '\r')
   3053  1.1  mrg 		    from++;
   3054  1.1  mrg 		  if (*from != '*' || from[1] == '/')
   3055  1.1  mrg 		    break;
   3056  1.1  mrg 		  from++;
   3057  1.1  mrg 		}
   3058  1.1  mrg 	      while (1);
   3059  1.1  mrg 	    }
   3060  1.1  mrg 	  else
   3061  1.1  mrg 	    while (*from && *from != '\n' && *from != '\r')
   3062  1.1  mrg 	      from++;
   3063  1.1  mrg 	}
   3064  1.1  mrg     }
   3065  1.1  mrg   /* C block comment.  */
   3066  1.1  mrg   if (*comment_start == '*')
   3067  1.1  mrg     {
   3068  1.1  mrg       if (*from != '*' || from[1] != '/')
   3069  1.1  mrg 	return false;
   3070  1.1  mrg     }
   3071  1.1  mrg   /* C++ line comment.  */
   3072  1.1  mrg   else if (*from != '\n')
   3073  1.1  mrg     return false;
   3074  1.1  mrg 
   3075  1.1  mrg   return true;
   3076  1.1  mrg }
   3077  1.1  mrg 
   3078  1.1  mrg /* Allocate COUNT tokens for RUN.  */
   3079  1.1  mrg void
   3080  1.1  mrg _cpp_init_tokenrun (tokenrun *run, unsigned int count)
   3081  1.1  mrg {
   3082  1.1  mrg   run->base = XNEWVEC (cpp_token, count);
   3083  1.1  mrg   run->limit = run->base + count;
   3084  1.1  mrg   run->next = NULL;
   3085  1.1  mrg }
   3086  1.1  mrg 
   3087  1.1  mrg /* Returns the next tokenrun, or creates one if there is none.  */
   3088  1.1  mrg static tokenrun *
   3089  1.1  mrg next_tokenrun (tokenrun *run)
   3090  1.1  mrg {
   3091  1.1  mrg   if (run->next == NULL)
   3092  1.1  mrg     {
   3093  1.1  mrg       run->next = XNEW (tokenrun);
   3094  1.1  mrg       run->next->prev = run;
   3095  1.1  mrg       _cpp_init_tokenrun (run->next, 250);
   3096  1.1  mrg     }
   3097  1.1  mrg 
   3098  1.1  mrg   return run->next;
   3099  1.1  mrg }
   3100  1.1  mrg 
   3101  1.1  mrg /* Return the number of not yet processed token in a given
   3102  1.1  mrg    context.  */
   3103  1.1  mrg int
   3104  1.1  mrg _cpp_remaining_tokens_num_in_context (cpp_context *context)
   3105  1.1  mrg {
   3106  1.1  mrg   if (context->tokens_kind == TOKENS_KIND_DIRECT)
   3107  1.1  mrg     return (LAST (context).token - FIRST (context).token);
   3108  1.1  mrg   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
   3109  1.1  mrg 	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
   3110  1.1  mrg     return (LAST (context).ptoken - FIRST (context).ptoken);
   3111  1.1  mrg   else
   3112  1.1  mrg       abort ();
   3113  1.1  mrg }
   3114  1.1  mrg 
   3115  1.1  mrg /* Returns the token present at index INDEX in a given context.  If
   3116  1.1  mrg    INDEX is zero, the next token to be processed is returned.  */
   3117  1.1  mrg static const cpp_token*
   3118  1.1  mrg _cpp_token_from_context_at (cpp_context *context, int index)
   3119  1.1  mrg {
   3120  1.1  mrg   if (context->tokens_kind == TOKENS_KIND_DIRECT)
   3121  1.1  mrg     return &(FIRST (context).token[index]);
   3122  1.1  mrg   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
   3123  1.1  mrg 	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
   3124  1.1  mrg     return FIRST (context).ptoken[index];
   3125  1.1  mrg  else
   3126  1.1  mrg    abort ();
   3127  1.1  mrg }
   3128  1.1  mrg 
   3129  1.1  mrg /* Look ahead in the input stream.  */
   3130  1.1  mrg const cpp_token *
   3131  1.1  mrg cpp_peek_token (cpp_reader *pfile, int index)
   3132  1.1  mrg {
   3133  1.1  mrg   cpp_context *context = pfile->context;
   3134  1.1  mrg   const cpp_token *peektok;
   3135  1.1  mrg   int count;
   3136  1.1  mrg 
   3137  1.1  mrg   /* First, scan through any pending cpp_context objects.  */
   3138  1.1  mrg   while (context->prev)
   3139  1.1  mrg     {
   3140  1.1  mrg       ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
   3141  1.1  mrg 
   3142  1.1  mrg       if (index < (int) sz)
   3143  1.1  mrg         return _cpp_token_from_context_at (context, index);
   3144  1.1  mrg       index -= (int) sz;
   3145  1.1  mrg       context = context->prev;
   3146  1.1  mrg     }
   3147  1.1  mrg 
   3148  1.1  mrg   /* We will have to read some new tokens after all (and do so
   3149  1.1  mrg      without invalidating preceding tokens).  */
   3150  1.1  mrg   count = index;
   3151  1.1  mrg   pfile->keep_tokens++;
   3152  1.1  mrg 
   3153  1.1  mrg   /* For peeked tokens temporarily disable line_change reporting,
   3154  1.1  mrg      until the tokens are parsed for real.  */
   3155  1.1  mrg   void (*line_change) (cpp_reader *, const cpp_token *, int)
   3156  1.1  mrg     = pfile->cb.line_change;
   3157  1.1  mrg   pfile->cb.line_change = NULL;
   3158  1.1  mrg 
   3159  1.1  mrg   do
   3160  1.1  mrg     {
   3161  1.1  mrg       peektok = _cpp_lex_token (pfile);
   3162  1.1  mrg       if (peektok->type == CPP_EOF)
   3163  1.1  mrg 	{
   3164  1.1  mrg 	  index--;
   3165  1.1  mrg 	  break;
   3166  1.1  mrg 	}
   3167  1.1  mrg       else if (peektok->type == CPP_PRAGMA)
   3168  1.1  mrg 	{
   3169  1.1  mrg 	  /* Don't peek past a pragma.  */
   3170  1.1  mrg 	  if (peektok == &pfile->directive_result)
   3171  1.1  mrg 	    /* Save the pragma in the buffer.  */
   3172  1.1  mrg 	    *pfile->cur_token++ = *peektok;
   3173  1.1  mrg 	  index--;
   3174  1.1  mrg 	  break;
   3175  1.1  mrg 	}
   3176  1.1  mrg     }
   3177  1.1  mrg   while (index--);
   3178  1.1  mrg 
   3179  1.1  mrg   _cpp_backup_tokens_direct (pfile, count - index);
   3180  1.1  mrg   pfile->keep_tokens--;
   3181  1.1  mrg   pfile->cb.line_change = line_change;
   3182  1.1  mrg 
   3183  1.1  mrg   return peektok;
   3184  1.1  mrg }
   3185  1.1  mrg 
   3186  1.1  mrg /* Allocate a single token that is invalidated at the same time as the
   3187  1.1  mrg    rest of the tokens on the line.  Has its line and col set to the
   3188  1.1  mrg    same as the last lexed token, so that diagnostics appear in the
   3189  1.1  mrg    right place.  */
   3190  1.1  mrg cpp_token *
   3191  1.1  mrg _cpp_temp_token (cpp_reader *pfile)
   3192  1.1  mrg {
   3193  1.1  mrg   cpp_token *old, *result;
   3194  1.1  mrg   ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
   3195  1.1  mrg   ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
   3196  1.1  mrg 
   3197  1.1  mrg   old = pfile->cur_token - 1;
   3198  1.1  mrg   /* Any pre-existing lookaheads must not be clobbered.  */
   3199  1.1  mrg   if (la)
   3200  1.1  mrg     {
   3201  1.1  mrg       if (sz <= la)
   3202  1.1  mrg         {
   3203  1.1  mrg           tokenrun *next = next_tokenrun (pfile->cur_run);
   3204  1.1  mrg 
   3205  1.1  mrg           if (sz < la)
   3206  1.1  mrg             memmove (next->base + 1, next->base,
   3207  1.1  mrg                      (la - sz) * sizeof (cpp_token));
   3208  1.1  mrg 
   3209  1.1  mrg           next->base[0] = pfile->cur_run->limit[-1];
   3210  1.1  mrg         }
   3211  1.1  mrg 
   3212  1.1  mrg       if (sz > 1)
   3213  1.1  mrg         memmove (pfile->cur_token + 1, pfile->cur_token,
   3214  1.1  mrg                  MIN (la, sz - 1) * sizeof (cpp_token));
   3215  1.1  mrg     }
   3216  1.1  mrg 
   3217  1.1  mrg   if (!sz && pfile->cur_token == pfile->cur_run->limit)
   3218  1.1  mrg     {
   3219  1.1  mrg       pfile->cur_run = next_tokenrun (pfile->cur_run);
   3220  1.1  mrg       pfile->cur_token = pfile->cur_run->base;
   3221  1.1  mrg     }
   3222  1.1  mrg 
   3223  1.1  mrg   result = pfile->cur_token++;
   3224  1.1  mrg   result->src_loc = old->src_loc;
   3225  1.1  mrg   return result;
   3226  1.1  mrg }
   3227  1.1  mrg 
   3228  1.1  mrg /* We're at the beginning of a logical line (so not in
   3229  1.1  mrg   directives-mode) and RESULT is a CPP_NAME with NODE_MODULE set.  See
   3230  1.1  mrg   if we should enter deferred_pragma mode to tokenize the rest of the
   3231  1.1  mrg   line as a module control-line.  */
   3232  1.1  mrg 
   3233  1.1  mrg static void
   3234  1.1  mrg cpp_maybe_module_directive (cpp_reader *pfile, cpp_token *result)
   3235  1.1  mrg {
   3236  1.1  mrg   unsigned backup = 0; /* Tokens we peeked.  */
   3237  1.1  mrg   cpp_hashnode *node = result->val.node.node;
   3238  1.1  mrg   cpp_token *peek = result;
   3239  1.1  mrg   cpp_token *keyword = peek;
   3240  1.1  mrg   cpp_hashnode *(&n_modules)[spec_nodes::M_HWM][2] = pfile->spec_nodes.n_modules;
   3241  1.1  mrg   int header_count = 0;
   3242  1.1  mrg 
   3243  1.1  mrg   /* Make sure the incoming state is as we expect it.  This way we
   3244  1.1  mrg      can restore it using constants.  */
   3245  1.1  mrg   gcc_checking_assert (!pfile->state.in_deferred_pragma
   3246  1.1  mrg 		       && !pfile->state.skipping
   3247  1.1  mrg 		       && !pfile->state.parsing_args
   3248  1.1  mrg 		       && !pfile->state.angled_headers
   3249  1.1  mrg 		       && (pfile->state.save_comments
   3250  1.1  mrg 			   == !CPP_OPTION (pfile, discard_comments)));
   3251  1.1  mrg 
   3252  1.1  mrg   /* Enter directives mode sufficiently for peeking.  We don't have
   3253  1.1  mrg      to actually set in_directive.  */
   3254  1.1  mrg   pfile->state.in_deferred_pragma = true;
   3255  1.1  mrg 
   3256  1.1  mrg   /* These two fields are needed to process tokenization in deferred
   3257  1.1  mrg      pragma mode.  They are not used outside deferred pragma mode or
   3258  1.1  mrg      directives mode.  */
   3259  1.1  mrg   pfile->state.pragma_allow_expansion = true;
   3260  1.1  mrg   pfile->directive_line = result->src_loc;
   3261  1.1  mrg 
   3262  1.1  mrg   /* Saving comments is incompatible with directives mode.   */
   3263  1.1  mrg   pfile->state.save_comments = 0;
   3264  1.1  mrg 
   3265  1.1  mrg   if (node == n_modules[spec_nodes::M_EXPORT][0])
   3266  1.1  mrg     {
   3267  1.1  mrg       peek = _cpp_lex_direct (pfile);
   3268  1.1  mrg       keyword = peek;
   3269  1.1  mrg       backup++;
   3270  1.1  mrg       if (keyword->type != CPP_NAME)
   3271  1.1  mrg 	goto not_module;
   3272  1.1  mrg       node = keyword->val.node.node;
   3273  1.1  mrg       if (!(node->flags & NODE_MODULE))
   3274  1.1  mrg 	goto not_module;
   3275  1.1  mrg     }
   3276  1.1  mrg 
   3277  1.1  mrg   if (node == n_modules[spec_nodes::M__IMPORT][0])
   3278  1.1  mrg     /* __import  */
   3279  1.1  mrg     header_count = backup + 2 + 16;
   3280  1.1  mrg   else if (node == n_modules[spec_nodes::M_IMPORT][0])
   3281  1.1  mrg     /* import  */
   3282  1.1  mrg     header_count = backup + 2 + (CPP_OPTION (pfile, preprocessed) ? 16 : 0);
   3283  1.1  mrg   else if (node == n_modules[spec_nodes::M_MODULE][0])
   3284  1.1  mrg     ; /* module  */
   3285  1.1  mrg   else
   3286  1.1  mrg     goto not_module;
   3287  1.1  mrg 
   3288  1.1  mrg   /* We've seen [export] {module|import|__import}.  Check the next token.  */
   3289  1.1  mrg   if (header_count)
   3290  1.1  mrg     /* After '{,__}import' a header name may appear.  */
   3291  1.1  mrg     pfile->state.angled_headers = true;
   3292  1.1  mrg   peek = _cpp_lex_direct (pfile);
   3293  1.1  mrg   backup++;
   3294  1.1  mrg 
   3295  1.1  mrg   /* ... import followed by identifier, ':', '<' or
   3296  1.1  mrg      header-name preprocessing tokens, or module
   3297  1.1  mrg      followed by cpp-identifier, ':' or ';' preprocessing
   3298  1.1  mrg      tokens.  C++ keywords are not yet relevant.  */
   3299  1.1  mrg   if (peek->type == CPP_NAME
   3300  1.1  mrg       || peek->type == CPP_COLON
   3301  1.1  mrg       ||  (header_count
   3302  1.1  mrg 	   ? (peek->type == CPP_LESS
   3303  1.1  mrg 	      || (peek->type == CPP_STRING && peek->val.str.text[0] != 'R')
   3304  1.1  mrg 	      || peek->type == CPP_HEADER_NAME)
   3305  1.1  mrg 	   : peek->type == CPP_SEMICOLON))
   3306  1.1  mrg     {
   3307  1.1  mrg       pfile->state.pragma_allow_expansion = !CPP_OPTION (pfile, preprocessed);
   3308  1.1  mrg       if (!pfile->state.pragma_allow_expansion)
   3309  1.1  mrg 	pfile->state.prevent_expansion++;
   3310  1.1  mrg 
   3311  1.1  mrg       if (!header_count && linemap_included_from
   3312  1.1  mrg 	  (LINEMAPS_LAST_ORDINARY_MAP (pfile->line_table)))
   3313  1.1  mrg 	cpp_error_with_line (pfile, CPP_DL_ERROR, keyword->src_loc, 0,
   3314  1.1  mrg 			     "module control-line cannot be in included file");
   3315  1.1  mrg 
   3316  1.1  mrg       /* The first one or two tokens cannot be macro names.  */
   3317  1.1  mrg       for (int ix = backup; ix--;)
   3318  1.1  mrg 	{
   3319  1.1  mrg 	  cpp_token *tok = ix ? keyword : result;
   3320  1.1  mrg 	  cpp_hashnode *node = tok->val.node.node;
   3321  1.1  mrg 
   3322  1.1  mrg 	  /* Don't attempt to expand the token.  */
   3323  1.1  mrg 	  tok->flags |= NO_EXPAND;
   3324  1.1  mrg 	  if (_cpp_defined_macro_p (node)
   3325  1.1  mrg 	      && _cpp_maybe_notify_macro_use (pfile, node, tok->src_loc)
   3326  1.1  mrg 	      && !cpp_fun_like_macro_p (node))
   3327  1.1  mrg 	    cpp_error_with_line (pfile, CPP_DL_ERROR, tok->src_loc, 0,
   3328  1.1  mrg 				 "module control-line \"%s\" cannot be"
   3329  1.1  mrg 				 " an object-like macro",
   3330  1.1  mrg 				 NODE_NAME (node));
   3331  1.1  mrg 	}
   3332  1.1  mrg 
   3333  1.1  mrg       /* Map to underbar variants.  */
   3334  1.1  mrg       keyword->val.node.node = n_modules[header_count
   3335  1.1  mrg 					 ? spec_nodes::M_IMPORT
   3336  1.1  mrg 					 : spec_nodes::M_MODULE][1];
   3337  1.1  mrg       if (backup != 1)
   3338  1.1  mrg 	result->val.node.node = n_modules[spec_nodes::M_EXPORT][1];
   3339  1.1  mrg 
   3340  1.1  mrg       /* Maybe tell the tokenizer we expect a header-name down the
   3341  1.1  mrg 	 road.  */
   3342  1.1  mrg       pfile->state.directive_file_token = header_count;
   3343  1.1  mrg     }
   3344  1.1  mrg   else
   3345  1.1  mrg     {
   3346  1.1  mrg     not_module:
   3347  1.1  mrg       /* Drop out of directive mode.  */
   3348  1.1  mrg       /* We aaserted save_comments had this value upon entry.  */
   3349  1.1  mrg       pfile->state.save_comments
   3350  1.1  mrg 	= !CPP_OPTION (pfile, discard_comments);
   3351  1.1  mrg       pfile->state.in_deferred_pragma = false;
   3352  1.1  mrg       /* Do not let this remain on.  */
   3353  1.1  mrg       pfile->state.angled_headers = false;
   3354  1.1  mrg     }
   3355  1.1  mrg 
   3356  1.1  mrg   /* In either case we want to backup the peeked tokens.  */
   3357  1.1  mrg   if (backup)
   3358  1.1  mrg     {
   3359  1.1  mrg       /* If we saw EOL, we should drop it, because this isn't a module
   3360  1.1  mrg 	 control-line after all.  */
   3361  1.1  mrg       bool eol = peek->type == CPP_PRAGMA_EOL;
   3362  1.1  mrg       if (!eol || backup > 1)
   3363  1.1  mrg 	{
   3364  1.1  mrg 	  /* Put put the peeked tokens back  */
   3365  1.1  mrg 	  _cpp_backup_tokens_direct (pfile, backup);
   3366  1.1  mrg 	  /* But if the last one was an EOL, forget it.  */
   3367  1.1  mrg 	  if (eol)
   3368  1.1  mrg 	    pfile->lookaheads--;
   3369  1.1  mrg 	}
   3370  1.1  mrg     }
   3371  1.1  mrg }
   3372  1.1  mrg 
   3373  1.1  mrg /* Lex a token into RESULT (external interface).  Takes care of issues
   3374  1.1  mrg    like directive handling, token lookahead, multiple include
   3375  1.1  mrg    optimization and skipping.  */
   3376  1.1  mrg const cpp_token *
   3377  1.1  mrg _cpp_lex_token (cpp_reader *pfile)
   3378  1.1  mrg {
   3379  1.1  mrg   cpp_token *result;
   3380  1.1  mrg 
   3381  1.1  mrg   for (;;)
   3382  1.1  mrg     {
   3383  1.1  mrg       if (pfile->cur_token == pfile->cur_run->limit)
   3384  1.1  mrg 	{
   3385  1.1  mrg 	  pfile->cur_run = next_tokenrun (pfile->cur_run);
   3386  1.1  mrg 	  pfile->cur_token = pfile->cur_run->base;
   3387  1.1  mrg 	}
   3388  1.1  mrg       /* We assume that the current token is somewhere in the current
   3389  1.1  mrg 	 run.  */
   3390  1.1  mrg       if (pfile->cur_token < pfile->cur_run->base
   3391  1.1  mrg 	  || pfile->cur_token >= pfile->cur_run->limit)
   3392  1.1  mrg 	abort ();
   3393  1.1  mrg 
   3394  1.1  mrg       if (pfile->lookaheads)
   3395  1.1  mrg 	{
   3396  1.1  mrg 	  pfile->lookaheads--;
   3397  1.1  mrg 	  result = pfile->cur_token++;
   3398  1.1  mrg 	}
   3399  1.1  mrg       else
   3400  1.1  mrg 	result = _cpp_lex_direct (pfile);
   3401  1.1  mrg 
   3402  1.1  mrg       if (result->flags & BOL)
   3403  1.1  mrg 	{
   3404  1.1  mrg 	  /* Is this a directive.  If _cpp_handle_directive returns
   3405  1.1  mrg 	     false, it is an assembler #.  */
   3406  1.1  mrg 	  if (result->type == CPP_HASH
   3407  1.1  mrg 	      /* 6.10.3 p 11: Directives in a list of macro arguments
   3408  1.1  mrg 		 gives undefined behavior.  This implementation
   3409  1.1  mrg 		 handles the directive as normal.  */
   3410  1.1  mrg 	      && pfile->state.parsing_args != 1)
   3411  1.1  mrg 	    {
   3412  1.1  mrg 	      if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
   3413  1.1  mrg 		{
   3414  1.1  mrg 		  if (pfile->directive_result.type == CPP_PADDING)
   3415  1.1  mrg 		    continue;
   3416  1.1  mrg 		  result = &pfile->directive_result;
   3417  1.1  mrg 		}
   3418  1.1  mrg 	    }
   3419  1.1  mrg 	  else if (pfile->state.in_deferred_pragma)
   3420  1.1  mrg 	    result = &pfile->directive_result;
   3421  1.1  mrg 	  else if (result->type == CPP_NAME
   3422  1.1  mrg 		   && (result->val.node.node->flags & NODE_MODULE)
   3423  1.1  mrg 		   && !pfile->state.skipping
   3424  1.1  mrg 		   /* Unlike regular directives, we do not deal with
   3425  1.1  mrg 		      tokenizing module directives as macro arguments.
   3426  1.1  mrg 		      That's not permitted.  */
   3427  1.1  mrg 		   && !pfile->state.parsing_args)
   3428  1.1  mrg 	    {
   3429  1.1  mrg 	      /* P1857.  Before macro expansion, At start of logical
   3430  1.1  mrg 		 line ... */
   3431  1.1  mrg 	      /* We don't have to consider lookaheads at this point.  */
   3432  1.1  mrg 	      gcc_checking_assert (!pfile->lookaheads);
   3433  1.1  mrg 
   3434  1.1  mrg 	      cpp_maybe_module_directive (pfile, result);
   3435  1.1  mrg 	    }
   3436  1.1  mrg 
   3437  1.1  mrg 	  if (pfile->cb.line_change && !pfile->state.skipping)
   3438  1.1  mrg 	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
   3439  1.1  mrg 	}
   3440  1.1  mrg 
   3441  1.1  mrg       /* We don't skip tokens in directives.  */
   3442  1.1  mrg       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
   3443  1.1  mrg 	break;
   3444  1.1  mrg 
   3445  1.1  mrg       /* Outside a directive, invalidate controlling macros.  At file
   3446  1.1  mrg 	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
   3447  1.1  mrg 	 get here and MI optimization works.  */
   3448  1.1  mrg       pfile->mi_valid = false;
   3449  1.1  mrg 
   3450  1.1  mrg       if (!pfile->state.skipping || result->type == CPP_EOF)
   3451  1.1  mrg 	break;
   3452  1.1  mrg     }
   3453  1.1  mrg 
   3454  1.1  mrg   return result;
   3455  1.1  mrg }
   3456  1.1  mrg 
   3457  1.1  mrg /* Returns true if a fresh line has been loaded.  */
   3458  1.1  mrg bool
   3459  1.1  mrg _cpp_get_fresh_line (cpp_reader *pfile)
   3460  1.1  mrg {
   3461  1.1  mrg   /* We can't get a new line until we leave the current directive.  */
   3462  1.1  mrg   if (pfile->state.in_directive)
   3463  1.1  mrg     return false;
   3464  1.1  mrg 
   3465  1.1  mrg   for (;;)
   3466  1.1  mrg     {
   3467  1.1  mrg       cpp_buffer *buffer = pfile->buffer;
   3468  1.1  mrg 
   3469  1.1  mrg       if (!buffer->need_line)
   3470  1.1  mrg 	return true;
   3471  1.1  mrg 
   3472  1.1  mrg       if (buffer->next_line < buffer->rlimit)
   3473  1.1  mrg 	{
   3474  1.1  mrg 	  _cpp_clean_line (pfile);
   3475  1.1  mrg 	  return true;
   3476  1.1  mrg 	}
   3477  1.1  mrg 
   3478  1.1  mrg       /* First, get out of parsing arguments state.  */
   3479  1.1  mrg       if (pfile->state.parsing_args)
   3480  1.1  mrg 	return false;
   3481  1.1  mrg 
   3482  1.1  mrg       /* End of buffer.  Non-empty files should end in a newline.  */
   3483  1.1  mrg       if (buffer->buf != buffer->rlimit
   3484  1.1  mrg 	  && buffer->next_line > buffer->rlimit
   3485  1.1  mrg 	  && !buffer->from_stage3)
   3486  1.1  mrg 	{
   3487  1.1  mrg 	  /* Clip to buffer size.  */
   3488  1.1  mrg 	  buffer->next_line = buffer->rlimit;
   3489  1.1  mrg 	}
   3490  1.1  mrg 
   3491  1.1  mrg       if (buffer->prev && !buffer->return_at_eof)
   3492  1.1  mrg 	_cpp_pop_buffer (pfile);
   3493  1.1  mrg       else
   3494  1.1  mrg 	{
   3495  1.1  mrg 	  /* End of translation.  Do not pop the buffer yet. Increment
   3496  1.1  mrg 	     line number so that the EOF token is on a line of its own
   3497  1.1  mrg 	     (_cpp_lex_direct doesn't increment in that case, because
   3498  1.1  mrg 	     it's hard for it to distinguish this special case). */
   3499  1.1  mrg 	  CPP_INCREMENT_LINE (pfile, 0);
   3500  1.1  mrg 	  return false;
   3501  1.1  mrg 	}
   3502  1.1  mrg     }
   3503  1.1  mrg }
   3504  1.1  mrg 
   3505  1.1  mrg #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)		\
   3506  1.1  mrg   do							\
   3507  1.1  mrg     {							\
   3508  1.1  mrg       result->type = ELSE_TYPE;				\
   3509  1.1  mrg       if (*buffer->cur == CHAR)				\
   3510  1.1  mrg 	buffer->cur++, result->type = THEN_TYPE;	\
   3511  1.1  mrg     }							\
   3512  1.1  mrg   while (0)
   3513  1.1  mrg 
   3514  1.1  mrg /* Lex a token into pfile->cur_token, which is also incremented, to
   3515  1.1  mrg    get diagnostics pointing to the correct location.
   3516  1.1  mrg 
   3517  1.1  mrg    Does not handle issues such as token lookahead, multiple-include
   3518  1.1  mrg    optimization, directives, skipping etc.  This function is only
   3519  1.1  mrg    suitable for use by _cpp_lex_token, and in special cases like
   3520  1.1  mrg    lex_expansion_token which doesn't care for any of these issues.
   3521  1.1  mrg 
   3522  1.1  mrg    When meeting a newline, returns CPP_EOF if parsing a directive,
   3523  1.1  mrg    otherwise returns to the start of the token buffer if permissible.
   3524  1.1  mrg    Returns the location of the lexed token.  */
   3525  1.1  mrg cpp_token *
   3526  1.1  mrg _cpp_lex_direct (cpp_reader *pfile)
   3527  1.1  mrg {
   3528  1.1  mrg   cppchar_t c;
   3529  1.1  mrg   cpp_buffer *buffer;
   3530  1.1  mrg   const unsigned char *comment_start;
   3531  1.1  mrg   bool fallthrough_comment = false;
   3532  1.1  mrg   cpp_token *result = pfile->cur_token++;
   3533  1.1  mrg 
   3534  1.1  mrg  fresh_line:
   3535  1.1  mrg   result->flags = 0;
   3536  1.1  mrg   buffer = pfile->buffer;
   3537  1.1  mrg   if (buffer->need_line)
   3538  1.1  mrg     {
   3539  1.1  mrg       if (pfile->state.in_deferred_pragma)
   3540  1.1  mrg 	{
   3541  1.1  mrg 	  /* This can happen in cases like:
   3542  1.1  mrg 	     #define loop(x) whatever
   3543  1.1  mrg 	     #pragma omp loop
   3544  1.1  mrg 	     where when trying to expand loop we need to peek
   3545  1.1  mrg 	     next token after loop, but aren't still in_deferred_pragma
   3546  1.1  mrg 	     mode but are in in_directive mode, so buffer->need_line
   3547  1.1  mrg 	     is set, a CPP_EOF is peeked.  */
   3548  1.1  mrg 	  result->type = CPP_PRAGMA_EOL;
   3549  1.1  mrg 	  pfile->state.in_deferred_pragma = false;
   3550  1.1  mrg 	  if (!pfile->state.pragma_allow_expansion)
   3551  1.1  mrg 	    pfile->state.prevent_expansion--;
   3552  1.1  mrg 	  return result;
   3553  1.1  mrg 	}
   3554  1.1  mrg       if (!_cpp_get_fresh_line (pfile))
   3555  1.1  mrg 	{
   3556  1.1  mrg 	  result->type = CPP_EOF;
   3557  1.1  mrg 	  /* Not a real EOF in a directive or arg parsing -- we refuse
   3558  1.1  mrg   	     to advance to the next file now, and will once we're out
   3559  1.1  mrg   	     of those modes.  */
   3560  1.1  mrg 	  if (!pfile->state.in_directive && !pfile->state.parsing_args)
   3561  1.1  mrg 	    {
   3562  1.1  mrg 	      /* Tell the compiler the line number of the EOF token.  */
   3563  1.1  mrg 	      result->src_loc = pfile->line_table->highest_line;
   3564  1.1  mrg 	      result->flags = BOL;
   3565  1.1  mrg 	      /* Now pop the buffer that _cpp_get_fresh_line did not.  */
   3566  1.1  mrg 	      _cpp_pop_buffer (pfile);
   3567  1.1  mrg 	    }
   3568  1.1  mrg 	  return result;
   3569  1.1  mrg 	}
   3570  1.1  mrg       if (buffer != pfile->buffer)
   3571  1.1  mrg 	fallthrough_comment = false;
   3572  1.1  mrg       if (!pfile->keep_tokens)
   3573  1.1  mrg 	{
   3574  1.1  mrg 	  pfile->cur_run = &pfile->base_run;
   3575  1.1  mrg 	  result = pfile->base_run.base;
   3576  1.1  mrg 	  pfile->cur_token = result + 1;
   3577  1.1  mrg 	}
   3578  1.1  mrg       result->flags = BOL;
   3579  1.1  mrg       if (pfile->state.parsing_args == 2)
   3580  1.1  mrg 	result->flags |= PREV_WHITE;
   3581  1.1  mrg     }
   3582  1.1  mrg   buffer = pfile->buffer;
   3583  1.1  mrg  update_tokens_line:
   3584  1.1  mrg   result->src_loc = pfile->line_table->highest_line;
   3585  1.1  mrg 
   3586  1.1  mrg  skipped_white:
   3587  1.1  mrg   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
   3588  1.1  mrg       && !pfile->overlaid_buffer)
   3589  1.1  mrg     {
   3590  1.1  mrg       _cpp_process_line_notes (pfile, false);
   3591  1.1  mrg       result->src_loc = pfile->line_table->highest_line;
   3592  1.1  mrg     }
   3593  1.1  mrg   c = *buffer->cur++;
   3594  1.1  mrg 
   3595  1.1  mrg   if (pfile->forced_token_location)
   3596  1.1  mrg     result->src_loc = pfile->forced_token_location;
   3597  1.1  mrg   else
   3598  1.1  mrg     result->src_loc = linemap_position_for_column (pfile->line_table,
   3599  1.1  mrg 					  CPP_BUF_COLUMN (buffer, buffer->cur));
   3600  1.1  mrg 
   3601  1.1  mrg   switch (c)
   3602  1.1  mrg     {
   3603  1.1  mrg     case ' ': case '\t': case '\f': case '\v': case '\0':
   3604  1.1  mrg       result->flags |= PREV_WHITE;
   3605  1.1  mrg       skip_whitespace (pfile, c);
   3606  1.1  mrg       goto skipped_white;
   3607  1.1  mrg 
   3608  1.1  mrg     case '\n':
   3609  1.1  mrg       /* Increment the line, unless this is the last line ...  */
   3610  1.1  mrg       if (buffer->cur < buffer->rlimit
   3611  1.1  mrg 	  /* ... or this is a #include, (where _cpp_stack_file needs to
   3612  1.1  mrg 	     unwind by one line) ...  */
   3613  1.1  mrg 	  || (pfile->state.in_directive > 1
   3614  1.1  mrg 	      /* ... except traditional-cpp increments this elsewhere.  */
   3615  1.1  mrg 	      && !CPP_OPTION (pfile, traditional)))
   3616  1.1  mrg 	CPP_INCREMENT_LINE (pfile, 0);
   3617  1.1  mrg       buffer->need_line = true;
   3618  1.1  mrg       if (pfile->state.in_deferred_pragma)
   3619  1.1  mrg 	{
   3620  1.1  mrg 	  /* Produce the PRAGMA_EOL on this line.  File reading
   3621  1.1  mrg 	     ensures there is always a \n at end of the buffer, thus
   3622  1.1  mrg 	     in a deferred pragma we always see CPP_PRAGMA_EOL before
   3623  1.1  mrg 	     any CPP_EOF.  */
   3624  1.1  mrg 	  result->type = CPP_PRAGMA_EOL;
   3625  1.1  mrg 	  result->flags &= ~PREV_WHITE;
   3626  1.1  mrg 	  pfile->state.in_deferred_pragma = false;
   3627  1.1  mrg 	  if (!pfile->state.pragma_allow_expansion)
   3628  1.1  mrg 	    pfile->state.prevent_expansion--;
   3629  1.1  mrg 	  return result;
   3630  1.1  mrg 	}
   3631  1.1  mrg       goto fresh_line;
   3632  1.1  mrg 
   3633  1.1  mrg     case '0': case '1': case '2': case '3': case '4':
   3634  1.1  mrg     case '5': case '6': case '7': case '8': case '9':
   3635  1.1  mrg       {
   3636  1.1  mrg 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
   3637  1.1  mrg 	result->type = CPP_NUMBER;
   3638  1.1  mrg 	lex_number (pfile, &result->val.str, &nst);
   3639  1.1  mrg 	warn_about_normalization (pfile, result, &nst);
   3640  1.1  mrg 	break;
   3641  1.1  mrg       }
   3642  1.1  mrg 
   3643  1.1  mrg     case 'L':
   3644  1.1  mrg     case 'u':
   3645  1.1  mrg     case 'U':
   3646  1.1  mrg     case 'R':
   3647  1.1  mrg       /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
   3648  1.1  mrg 	 wide strings or raw strings.  */
   3649  1.1  mrg       if (c == 'L' || CPP_OPTION (pfile, rliterals)
   3650  1.1  mrg 	  || (c != 'R' && CPP_OPTION (pfile, uliterals)))
   3651  1.1  mrg 	{
   3652  1.1  mrg 	  if ((*buffer->cur == '\'' && c != 'R')
   3653  1.1  mrg 	      || *buffer->cur == '"'
   3654  1.1  mrg 	      || (*buffer->cur == 'R'
   3655  1.1  mrg 		  && c != 'R'
   3656  1.1  mrg 		  && buffer->cur[1] == '"'
   3657  1.1  mrg 		  && CPP_OPTION (pfile, rliterals))
   3658  1.1  mrg 	      || (*buffer->cur == '8'
   3659  1.1  mrg 		  && c == 'u'
   3660  1.1  mrg 		  && ((buffer->cur[1] == '"' || (buffer->cur[1] == '\''
   3661  1.1  mrg 				&& CPP_OPTION (pfile, utf8_char_literals)))
   3662  1.1  mrg 		      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
   3663  1.1  mrg 			  && CPP_OPTION (pfile, rliterals)))))
   3664  1.1  mrg 	    {
   3665  1.1  mrg 	      lex_string (pfile, result, buffer->cur - 1);
   3666  1.1  mrg 	      break;
   3667  1.1  mrg 	    }
   3668  1.1  mrg 	}
   3669  1.1  mrg       /* Fall through.  */
   3670  1.1  mrg 
   3671  1.1  mrg     case '_':
   3672  1.1  mrg     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
   3673  1.1  mrg     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
   3674  1.1  mrg     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
   3675  1.1  mrg     case 's': case 't':           case 'v': case 'w': case 'x':
   3676  1.1  mrg     case 'y': case 'z':
   3677  1.1  mrg     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
   3678  1.1  mrg     case 'G': case 'H': case 'I': case 'J': case 'K':
   3679  1.1  mrg     case 'M': case 'N': case 'O': case 'P': case 'Q':
   3680  1.1  mrg     case 'S': case 'T':           case 'V': case 'W': case 'X':
   3681  1.1  mrg     case 'Y': case 'Z':
   3682  1.1  mrg       result->type = CPP_NAME;
   3683  1.1  mrg       {
   3684  1.1  mrg 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
   3685  1.1  mrg 	result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
   3686  1.1  mrg 						&nst,
   3687  1.1  mrg 						&result->val.node.spelling);
   3688  1.1  mrg 	warn_about_normalization (pfile, result, &nst);
   3689  1.1  mrg       }
   3690  1.1  mrg 
   3691  1.1  mrg       /* Convert named operators to their proper types.  */
   3692  1.1  mrg       if (result->val.node.node->flags & NODE_OPERATOR)
   3693  1.1  mrg 	{
   3694  1.1  mrg 	  result->flags |= NAMED_OP;
   3695  1.1  mrg 	  result->type = (enum cpp_ttype) result->val.node.node->directive_index;
   3696  1.1  mrg 	}
   3697  1.1  mrg 
   3698  1.1  mrg       /* Signal FALLTHROUGH comment followed by another token.  */
   3699  1.1  mrg       if (fallthrough_comment)
   3700  1.1  mrg 	result->flags |= PREV_FALLTHROUGH;
   3701  1.1  mrg       break;
   3702  1.1  mrg 
   3703  1.1  mrg     case '\'':
   3704  1.1  mrg     case '"':
   3705  1.1  mrg       lex_string (pfile, result, buffer->cur - 1);
   3706  1.1  mrg       break;
   3707  1.1  mrg 
   3708  1.1  mrg     case '/':
   3709  1.1  mrg       /* A potential block or line comment.  */
   3710  1.1  mrg       comment_start = buffer->cur;
   3711  1.1  mrg       c = *buffer->cur;
   3712  1.1  mrg 
   3713  1.1  mrg       if (c == '*')
   3714  1.1  mrg 	{
   3715  1.1  mrg 	  if (_cpp_skip_block_comment (pfile))
   3716  1.1  mrg 	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
   3717  1.1  mrg 	}
   3718  1.1  mrg       else if (c == '/' && ! CPP_OPTION (pfile, traditional))
   3719  1.1  mrg 	{
   3720  1.1  mrg 	  /* Don't warn for system headers.  */
   3721  1.1  mrg 	  if (_cpp_in_system_header (pfile))
   3722  1.1  mrg 	    ;
   3723  1.1  mrg 	  /* Warn about comments if pedantically GNUC89, and not
   3724  1.1  mrg 	     in system headers.  */
   3725  1.1  mrg 	  else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
   3726  1.1  mrg 		   && CPP_PEDANTIC (pfile)
   3727  1.1  mrg 		   && ! buffer->warned_cplusplus_comments)
   3728  1.1  mrg 	    {
   3729  1.1  mrg 	      if (cpp_error (pfile, CPP_DL_PEDWARN,
   3730  1.1  mrg 			     "C++ style comments are not allowed in ISO C90"))
   3731  1.1  mrg 		cpp_error (pfile, CPP_DL_NOTE,
   3732  1.1  mrg 			   "(this will be reported only once per input file)");
   3733  1.1  mrg 	      buffer->warned_cplusplus_comments = 1;
   3734  1.1  mrg 	    }
   3735  1.1  mrg 	  /* Or if specifically desired via -Wc90-c99-compat.  */
   3736  1.1  mrg 	  else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
   3737  1.1  mrg 		   && ! CPP_OPTION (pfile, cplusplus)
   3738  1.1  mrg 		   && ! buffer->warned_cplusplus_comments)
   3739  1.1  mrg 	    {
   3740  1.1  mrg 	      if (cpp_error (pfile, CPP_DL_WARNING,
   3741  1.1  mrg 			     "C++ style comments are incompatible with C90"))
   3742  1.1  mrg 		cpp_error (pfile, CPP_DL_NOTE,
   3743  1.1  mrg 			   "(this will be reported only once per input file)");
   3744  1.1  mrg 	      buffer->warned_cplusplus_comments = 1;
   3745  1.1  mrg 	    }
   3746  1.1  mrg 	  /* In C89/C94, C++ style comments are forbidden.  */
   3747  1.1  mrg 	  else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
   3748  1.1  mrg 		    || CPP_OPTION (pfile, lang) == CLK_STDC94))
   3749  1.1  mrg 	    {
   3750  1.1  mrg 	      /* But don't be confused about valid code such as
   3751  1.1  mrg 	         - // immediately followed by *,
   3752  1.1  mrg 		 - // in a preprocessing directive,
   3753  1.1  mrg 		 - // in an #if 0 block.  */
   3754  1.1  mrg 	      if (buffer->cur[1] == '*'
   3755  1.1  mrg 		  || pfile->state.in_directive
   3756  1.1  mrg 		  || pfile->state.skipping)
   3757  1.1  mrg 		{
   3758  1.1  mrg 		  result->type = CPP_DIV;
   3759  1.1  mrg 		  break;
   3760  1.1  mrg 		}
   3761  1.1  mrg 	      else if (! buffer->warned_cplusplus_comments)
   3762  1.1  mrg 		{
   3763  1.1  mrg 		  if (cpp_error (pfile, CPP_DL_ERROR,
   3764  1.1  mrg 				 "C++ style comments are not allowed in "
   3765  1.1  mrg 				 "ISO C90"))
   3766  1.1  mrg 		    cpp_error (pfile, CPP_DL_NOTE,
   3767  1.1  mrg 			       "(this will be reported only once per input "
   3768  1.1  mrg 			       "file)");
   3769  1.1  mrg 		  buffer->warned_cplusplus_comments = 1;
   3770  1.1  mrg 		}
   3771  1.1  mrg 	    }
   3772  1.1  mrg 	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
   3773  1.1  mrg 	    cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
   3774  1.1  mrg 	}
   3775  1.1  mrg       else if (c == '=')
   3776  1.1  mrg 	{
   3777  1.1  mrg 	  buffer->cur++;
   3778  1.1  mrg 	  result->type = CPP_DIV_EQ;
   3779  1.1  mrg 	  break;
   3780  1.1  mrg 	}
   3781  1.1  mrg       else
   3782  1.1  mrg 	{
   3783  1.1  mrg 	  result->type = CPP_DIV;
   3784  1.1  mrg 	  break;
   3785  1.1  mrg 	}
   3786  1.1  mrg 
   3787  1.1  mrg       if (fallthrough_comment_p (pfile, comment_start))
   3788  1.1  mrg 	fallthrough_comment = true;
   3789  1.1  mrg 
   3790  1.1  mrg       if (pfile->cb.comment)
   3791  1.1  mrg 	{
   3792  1.1  mrg 	  size_t len = pfile->buffer->cur - comment_start;
   3793  1.1  mrg 	  pfile->cb.comment (pfile, result->src_loc, comment_start - 1,
   3794  1.1  mrg 			     len + 1);
   3795  1.1  mrg 	}
   3796  1.1  mrg 
   3797  1.1  mrg       if (!pfile->state.save_comments)
   3798  1.1  mrg 	{
   3799  1.1  mrg 	  result->flags |= PREV_WHITE;
   3800  1.1  mrg 	  goto update_tokens_line;
   3801  1.1  mrg 	}
   3802  1.1  mrg 
   3803  1.1  mrg       if (fallthrough_comment)
   3804  1.1  mrg 	result->flags |= PREV_FALLTHROUGH;
   3805  1.1  mrg 
   3806  1.1  mrg       /* Save the comment as a token in its own right.  */
   3807  1.1  mrg       save_comment (pfile, result, comment_start, c);
   3808  1.1  mrg       break;
   3809  1.1  mrg 
   3810  1.1  mrg     case '<':
   3811  1.1  mrg       if (pfile->state.angled_headers)
   3812  1.1  mrg 	{
   3813  1.1  mrg 	  lex_string (pfile, result, buffer->cur - 1);
   3814  1.1  mrg 	  if (result->type != CPP_LESS)
   3815  1.1  mrg 	    break;
   3816  1.1  mrg 	}
   3817  1.1  mrg 
   3818  1.1  mrg       result->type = CPP_LESS;
   3819  1.1  mrg       if (*buffer->cur == '=')
   3820  1.1  mrg 	{
   3821  1.1  mrg 	  buffer->cur++, result->type = CPP_LESS_EQ;
   3822  1.1  mrg 	  if (*buffer->cur == '>'
   3823  1.1  mrg 	      && CPP_OPTION (pfile, cplusplus)
   3824  1.1  mrg 	      && CPP_OPTION (pfile, lang) >= CLK_GNUCXX20)
   3825  1.1  mrg 	    buffer->cur++, result->type = CPP_SPACESHIP;
   3826  1.1  mrg 	}
   3827  1.1  mrg       else if (*buffer->cur == '<')
   3828  1.1  mrg 	{
   3829  1.1  mrg 	  buffer->cur++;
   3830  1.1  mrg 	  IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
   3831  1.1  mrg 	}
   3832  1.1  mrg       else if (CPP_OPTION (pfile, digraphs))
   3833  1.1  mrg 	{
   3834  1.1  mrg 	  if (*buffer->cur == ':')
   3835  1.1  mrg 	    {
   3836  1.1  mrg 	      /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
   3837  1.1  mrg 		 three characters are <:: and the subsequent character
   3838  1.1  mrg 		 is neither : nor >, the < is treated as a preprocessor
   3839  1.1  mrg 		 token by itself".  */
   3840  1.1  mrg 	      if (CPP_OPTION (pfile, cplusplus)
   3841  1.1  mrg 		  && CPP_OPTION (pfile, lang) != CLK_CXX98
   3842  1.1  mrg 		  && CPP_OPTION (pfile, lang) != CLK_GNUCXX
   3843  1.1  mrg 		  && buffer->cur[1] == ':'
   3844  1.1  mrg 		  && buffer->cur[2] != ':' && buffer->cur[2] != '>')
   3845  1.1  mrg 		break;
   3846  1.1  mrg 
   3847  1.1  mrg 	      buffer->cur++;
   3848  1.1  mrg 	      result->flags |= DIGRAPH;
   3849  1.1  mrg 	      result->type = CPP_OPEN_SQUARE;
   3850  1.1  mrg 	    }
   3851  1.1  mrg 	  else if (*buffer->cur == '%')
   3852  1.1  mrg 	    {
   3853  1.1  mrg 	      buffer->cur++;
   3854  1.1  mrg 	      result->flags |= DIGRAPH;
   3855  1.1  mrg 	      result->type = CPP_OPEN_BRACE;
   3856  1.1  mrg 	    }
   3857  1.1  mrg 	}
   3858  1.1  mrg       break;
   3859  1.1  mrg 
   3860  1.1  mrg     case '>':
   3861  1.1  mrg       result->type = CPP_GREATER;
   3862  1.1  mrg       if (*buffer->cur == '=')
   3863  1.1  mrg 	buffer->cur++, result->type = CPP_GREATER_EQ;
   3864  1.1  mrg       else if (*buffer->cur == '>')
   3865  1.1  mrg 	{
   3866  1.1  mrg 	  buffer->cur++;
   3867  1.1  mrg 	  IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
   3868  1.1  mrg 	}
   3869  1.1  mrg       break;
   3870  1.1  mrg 
   3871  1.1  mrg     case '%':
   3872  1.1  mrg       result->type = CPP_MOD;
   3873  1.1  mrg       if (*buffer->cur == '=')
   3874  1.1  mrg 	buffer->cur++, result->type = CPP_MOD_EQ;
   3875  1.1  mrg       else if (CPP_OPTION (pfile, digraphs))
   3876  1.1  mrg 	{
   3877  1.1  mrg 	  if (*buffer->cur == ':')
   3878  1.1  mrg 	    {
   3879  1.1  mrg 	      buffer->cur++;
   3880  1.1  mrg 	      result->flags |= DIGRAPH;
   3881  1.1  mrg 	      result->type = CPP_HASH;
   3882  1.1  mrg 	      if (*buffer->cur == '%' && buffer->cur[1] == ':')
   3883  1.1  mrg 		buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
   3884  1.1  mrg 	    }
   3885  1.1  mrg 	  else if (*buffer->cur == '>')
   3886  1.1  mrg 	    {
   3887  1.1  mrg 	      buffer->cur++;
   3888  1.1  mrg 	      result->flags |= DIGRAPH;
   3889  1.1  mrg 	      result->type = CPP_CLOSE_BRACE;
   3890  1.1  mrg 	    }
   3891  1.1  mrg 	}
   3892  1.1  mrg       break;
   3893  1.1  mrg 
   3894  1.1  mrg     case '.':
   3895  1.1  mrg       result->type = CPP_DOT;
   3896  1.1  mrg       if (ISDIGIT (*buffer->cur))
   3897  1.1  mrg 	{
   3898  1.1  mrg 	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
   3899  1.1  mrg 	  result->type = CPP_NUMBER;
   3900  1.1  mrg 	  lex_number (pfile, &result->val.str, &nst);
   3901  1.1  mrg 	  warn_about_normalization (pfile, result, &nst);
   3902  1.1  mrg 	}
   3903  1.1  mrg       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
   3904  1.1  mrg 	buffer->cur += 2, result->type = CPP_ELLIPSIS;
   3905  1.1  mrg       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
   3906  1.1  mrg 	buffer->cur++, result->type = CPP_DOT_STAR;
   3907  1.1  mrg       break;
   3908  1.1  mrg 
   3909  1.1  mrg     case '+':
   3910  1.1  mrg       result->type = CPP_PLUS;
   3911  1.1  mrg       if (*buffer->cur == '+')
   3912  1.1  mrg 	buffer->cur++, result->type = CPP_PLUS_PLUS;
   3913  1.1  mrg       else if (*buffer->cur == '=')
   3914  1.1  mrg 	buffer->cur++, result->type = CPP_PLUS_EQ;
   3915  1.1  mrg       break;
   3916  1.1  mrg 
   3917  1.1  mrg     case '-':
   3918  1.1  mrg       result->type = CPP_MINUS;
   3919  1.1  mrg       if (*buffer->cur == '>')
   3920  1.1  mrg 	{
   3921  1.1  mrg 	  buffer->cur++;
   3922  1.1  mrg 	  result->type = CPP_DEREF;
   3923  1.1  mrg 	  if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
   3924  1.1  mrg 	    buffer->cur++, result->type = CPP_DEREF_STAR;
   3925  1.1  mrg 	}
   3926  1.1  mrg       else if (*buffer->cur == '-')
   3927  1.1  mrg 	buffer->cur++, result->type = CPP_MINUS_MINUS;
   3928  1.1  mrg       else if (*buffer->cur == '=')
   3929  1.1  mrg 	buffer->cur++, result->type = CPP_MINUS_EQ;
   3930  1.1  mrg       break;
   3931  1.1  mrg 
   3932  1.1  mrg     case '&':
   3933  1.1  mrg       result->type = CPP_AND;
   3934  1.1  mrg       if (*buffer->cur == '&')
   3935  1.1  mrg 	buffer->cur++, result->type = CPP_AND_AND;
   3936  1.1  mrg       else if (*buffer->cur == '=')
   3937  1.1  mrg 	buffer->cur++, result->type = CPP_AND_EQ;
   3938  1.1  mrg       break;
   3939  1.1  mrg 
   3940  1.1  mrg     case '|':
   3941  1.1  mrg       result->type = CPP_OR;
   3942  1.1  mrg       if (*buffer->cur == '|')
   3943  1.1  mrg 	buffer->cur++, result->type = CPP_OR_OR;
   3944  1.1  mrg       else if (*buffer->cur == '=')
   3945  1.1  mrg 	buffer->cur++, result->type = CPP_OR_EQ;
   3946  1.1  mrg       break;
   3947  1.1  mrg 
   3948  1.1  mrg     case ':':
   3949  1.3  mrg       result->type = CPP_COLON;
   3950  1.3  mrg       if (*buffer->cur == ':')
   3951  1.3  mrg 	{
   3952  1.3  mrg 	  if (CPP_OPTION (pfile, scope))
   3953  1.3  mrg 	    buffer->cur++, result->type = CPP_SCOPE;
   3954  1.3  mrg 	  else
   3955  1.3  mrg 	    result->flags |= COLON_SCOPE;
   3956  1.1  mrg 	}
   3957  1.1  mrg       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
   3958  1.1  mrg 	{
   3959  1.1  mrg 	  buffer->cur++;
   3960  1.1  mrg 	  result->flags |= DIGRAPH;
   3961  1.1  mrg 	  result->type = CPP_CLOSE_SQUARE;
   3962  1.1  mrg 	}
   3963  1.1  mrg       break;
   3964  1.1  mrg 
   3965  1.1  mrg     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
   3966  1.1  mrg     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
   3967  1.1  mrg     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
   3968  1.1  mrg     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
   3969  1.1  mrg     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
   3970  1.1  mrg 
   3971  1.1  mrg     case '?': result->type = CPP_QUERY; break;
   3972  1.1  mrg     case '~': result->type = CPP_COMPL; break;
   3973  1.1  mrg     case ',': result->type = CPP_COMMA; break;
   3974  1.1  mrg     case '(': result->type = CPP_OPEN_PAREN; break;
   3975  1.1  mrg     case ')': result->type = CPP_CLOSE_PAREN; break;
   3976  1.1  mrg     case '[': result->type = CPP_OPEN_SQUARE; break;
   3977  1.1  mrg     case ']': result->type = CPP_CLOSE_SQUARE; break;
   3978  1.1  mrg     case '{': result->type = CPP_OPEN_BRACE; break;
   3979  1.1  mrg     case '}': result->type = CPP_CLOSE_BRACE; break;
   3980  1.1  mrg     case ';': result->type = CPP_SEMICOLON; break;
   3981  1.1  mrg 
   3982  1.1  mrg       /* @ is a punctuator in Objective-C.  */
   3983  1.1  mrg     case '@': result->type = CPP_ATSIGN; break;
   3984  1.1  mrg 
   3985  1.1  mrg     default:
   3986  1.1  mrg       {
   3987  1.1  mrg 	const uchar *base = --buffer->cur;
   3988  1.1  mrg 
   3989  1.1  mrg 	/* Check for an extended identifier ($ or UCN or UTF-8).  */
   3990  1.1  mrg 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
   3991  1.1  mrg 	if (forms_identifier_p (pfile, true, &nst))
   3992  1.1  mrg 	  {
   3993  1.1  mrg 	    result->type = CPP_NAME;
   3994  1.1  mrg 	    result->val.node.node = lex_identifier (pfile, base, true, &nst,
   3995  1.1  mrg 						    &result->val.node.spelling);
   3996  1.1  mrg 	    warn_about_normalization (pfile, result, &nst);
   3997  1.1  mrg 	    break;
   3998  1.1  mrg 	  }
   3999  1.1  mrg 
   4000  1.1  mrg 	/* Otherwise this will form a CPP_OTHER token.  Parse valid UTF-8 as a
   4001  1.1  mrg 	   single token.  */
   4002  1.1  mrg 	buffer->cur++;
   4003  1.1  mrg 	if (c >= utf8_signifier)
   4004  1.1  mrg 	  {
   4005  1.1  mrg 	    const uchar *pstr = base;
   4006  1.1  mrg 	    cppchar_t s;
   4007  1.1  mrg 	    if (_cpp_valid_utf8 (pfile, &pstr, buffer->rlimit, 0, NULL, &s))
   4008  1.1  mrg 	      buffer->cur = pstr;
   4009  1.1  mrg 	  }
   4010  1.1  mrg 	create_literal (pfile, result, base, buffer->cur - base, CPP_OTHER);
   4011  1.1  mrg 	break;
   4012  1.1  mrg       }
   4013  1.1  mrg 
   4014  1.1  mrg     }
   4015  1.1  mrg 
   4016  1.1  mrg   /* Potentially convert the location of the token to a range.  */
   4017  1.1  mrg   if (result->src_loc >= RESERVED_LOCATION_COUNT
   4018  1.1  mrg       && result->type != CPP_EOF)
   4019  1.1  mrg     {
   4020  1.1  mrg       /* Ensure that any line notes are processed, so that we have the
   4021  1.1  mrg 	 correct physical line/column for the end-point of the token even
   4022  1.1  mrg 	 when a logical line is split via one or more backslashes.  */
   4023  1.1  mrg       if (buffer->cur >= buffer->notes[buffer->cur_note].pos
   4024  1.1  mrg 	  && !pfile->overlaid_buffer)
   4025  1.1  mrg 	_cpp_process_line_notes (pfile, false);
   4026  1.1  mrg 
   4027  1.1  mrg       source_range tok_range;
   4028  1.1  mrg       tok_range.m_start = result->src_loc;
   4029  1.1  mrg       tok_range.m_finish
   4030  1.1  mrg 	= linemap_position_for_column (pfile->line_table,
   4031  1.1  mrg 				       CPP_BUF_COLUMN (buffer, buffer->cur));
   4032  1.1  mrg 
   4033  1.1  mrg       result->src_loc = COMBINE_LOCATION_DATA (pfile->line_table,
   4034  1.1  mrg 					       result->src_loc,
   4035  1.1  mrg 					       tok_range, NULL);
   4036  1.1  mrg     }
   4037  1.1  mrg 
   4038  1.1  mrg   return result;
   4039  1.1  mrg }
   4040  1.1  mrg 
   4041  1.1  mrg /* An upper bound on the number of bytes needed to spell TOKEN.
   4042  1.1  mrg    Does not include preceding whitespace.  */
   4043  1.1  mrg unsigned int
   4044  1.1  mrg cpp_token_len (const cpp_token *token)
   4045  1.1  mrg {
   4046  1.1  mrg   unsigned int len;
   4047  1.1  mrg 
   4048  1.1  mrg   switch (TOKEN_SPELL (token))
   4049  1.1  mrg     {
   4050  1.1  mrg     default:		len = 6;				break;
   4051  1.1  mrg     case SPELL_LITERAL:	len = token->val.str.len;		break;
   4052  1.1  mrg     case SPELL_IDENT:	len = NODE_LEN (token->val.node.node) * 10;	break;
   4053  1.1  mrg     }
   4054  1.1  mrg 
   4055  1.1  mrg   return len;
   4056  1.1  mrg }
   4057  1.1  mrg 
   4058  1.1  mrg /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
   4059  1.1  mrg    Return the number of bytes read out of NAME.  (There are always
   4060  1.1  mrg    10 bytes written to BUFFER.)  */
   4061  1.1  mrg 
   4062  1.1  mrg static size_t
   4063  1.1  mrg utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
   4064  1.1  mrg {
   4065  1.1  mrg   int j;
   4066  1.1  mrg   int ucn_len = 0;
   4067  1.1  mrg   int ucn_len_c;
   4068  1.1  mrg   unsigned t;
   4069  1.1  mrg   unsigned long utf32;
   4070  1.1  mrg 
   4071  1.1  mrg   /* Compute the length of the UTF-8 sequence.  */
   4072  1.1  mrg   for (t = *name; t & 0x80; t <<= 1)
   4073  1.1  mrg     ucn_len++;
   4074  1.1  mrg 
   4075  1.1  mrg   utf32 = *name & (0x7F >> ucn_len);
   4076  1.1  mrg   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
   4077  1.1  mrg     {
   4078  1.1  mrg       utf32 = (utf32 << 6) | (*++name & 0x3F);
   4079  1.1  mrg 
   4080  1.1  mrg       /* Ill-formed UTF-8.  */
   4081  1.1  mrg       if ((*name & ~0x3F) != 0x80)
   4082  1.1  mrg 	abort ();
   4083  1.1  mrg     }
   4084  1.1  mrg 
   4085  1.1  mrg   *buffer++ = '\\';
   4086  1.1  mrg   *buffer++ = 'U';
   4087  1.1  mrg   for (j = 7; j >= 0; j--)
   4088  1.1  mrg     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
   4089  1.1  mrg   return ucn_len;
   4090  1.1  mrg }
   4091  1.1  mrg 
   4092  1.1  mrg /* Given a token TYPE corresponding to a digraph, return a pointer to
   4093  1.1  mrg    the spelling of the digraph.  */
   4094  1.1  mrg static const unsigned char *
   4095  1.1  mrg cpp_digraph2name (enum cpp_ttype type)
   4096  1.1  mrg {
   4097  1.1  mrg   return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
   4098  1.1  mrg }
   4099  1.1  mrg 
   4100  1.1  mrg /* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
   4101  1.1  mrg    The buffer must already contain the enough space to hold the
   4102  1.1  mrg    token's spelling.  Returns a pointer to the character after the
   4103  1.1  mrg    last character written.  */
   4104  1.1  mrg unsigned char *
   4105  1.1  mrg _cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
   4106  1.1  mrg {
   4107  1.1  mrg   size_t i;
   4108  1.1  mrg   const unsigned char *name = NODE_NAME (ident);
   4109  1.1  mrg 
   4110  1.1  mrg   for (i = 0; i < NODE_LEN (ident); i++)
   4111  1.1  mrg     if (name[i] & ~0x7F)
   4112  1.1  mrg       {
   4113  1.1  mrg 	i += utf8_to_ucn (buffer, name + i) - 1;
   4114  1.1  mrg 	buffer += 10;
   4115  1.1  mrg       }
   4116  1.1  mrg     else
   4117  1.1  mrg       *buffer++ = name[i];
   4118  1.1  mrg 
   4119  1.1  mrg   return buffer;
   4120  1.1  mrg }
   4121  1.1  mrg 
   4122  1.1  mrg /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
   4123  1.1  mrg    already contain the enough space to hold the token's spelling.
   4124  1.1  mrg    Returns a pointer to the character after the last character written.
   4125  1.1  mrg    FORSTRING is true if this is to be the spelling after translation
   4126  1.1  mrg    phase 1 (with the original spelling of extended identifiers), false
   4127  1.1  mrg    if extended identifiers should always be written using UCNs (there is
   4128  1.1  mrg    no option for always writing them in the internal UTF-8 form).
   4129  1.1  mrg    FIXME: Would be nice if we didn't need the PFILE argument.  */
   4130  1.1  mrg unsigned char *
   4131  1.1  mrg cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
   4132  1.1  mrg 		 unsigned char *buffer, bool forstring)
   4133  1.1  mrg {
   4134  1.1  mrg   switch (TOKEN_SPELL (token))
   4135  1.1  mrg     {
   4136  1.1  mrg     case SPELL_OPERATOR:
   4137  1.1  mrg       {
   4138  1.1  mrg 	const unsigned char *spelling;
   4139  1.1  mrg 	unsigned char c;
   4140  1.1  mrg 
   4141  1.1  mrg 	if (token->flags & DIGRAPH)
   4142  1.1  mrg 	  spelling = cpp_digraph2name (token->type);
   4143  1.1  mrg 	else if (token->flags & NAMED_OP)
   4144  1.1  mrg 	  goto spell_ident;
   4145  1.1  mrg 	else
   4146  1.1  mrg 	  spelling = TOKEN_NAME (token);
   4147  1.1  mrg 
   4148  1.1  mrg 	while ((c = *spelling++) != '\0')
   4149  1.1  mrg 	  *buffer++ = c;
   4150  1.1  mrg       }
   4151  1.1  mrg       break;
   4152  1.1  mrg 
   4153  1.1  mrg     spell_ident:
   4154  1.1  mrg     case SPELL_IDENT:
   4155  1.1  mrg       if (forstring)
   4156  1.1  mrg 	{
   4157  1.1  mrg 	  memcpy (buffer, NODE_NAME (token->val.node.spelling),
   4158  1.1  mrg 		  NODE_LEN (token->val.node.spelling));
   4159  1.1  mrg 	  buffer += NODE_LEN (token->val.node.spelling);
   4160  1.1  mrg 	}
   4161  1.1  mrg       else
   4162  1.1  mrg 	buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
   4163  1.1  mrg       break;
   4164  1.1  mrg 
   4165  1.1  mrg     case SPELL_LITERAL:
   4166  1.1  mrg       memcpy (buffer, token->val.str.text, token->val.str.len);
   4167  1.1  mrg       buffer += token->val.str.len;
   4168  1.1  mrg       break;
   4169  1.1  mrg 
   4170  1.1  mrg     case SPELL_NONE:
   4171  1.1  mrg       cpp_error (pfile, CPP_DL_ICE,
   4172  1.1  mrg 		 "unspellable token %s", TOKEN_NAME (token));
   4173  1.1  mrg       break;
   4174  1.1  mrg     }
   4175  1.1  mrg 
   4176  1.1  mrg   return buffer;
   4177  1.1  mrg }
   4178  1.1  mrg 
   4179  1.1  mrg /* Returns TOKEN spelt as a null-terminated string.  The string is
   4180  1.1  mrg    freed when the reader is destroyed.  Useful for diagnostics.  */
   4181  1.1  mrg unsigned char *
   4182  1.1  mrg cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
   4183  1.1  mrg {
   4184  1.1  mrg   unsigned int len = cpp_token_len (token) + 1;
   4185  1.1  mrg   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
   4186  1.1  mrg 
   4187  1.1  mrg   end = cpp_spell_token (pfile, token, start, false);
   4188  1.1  mrg   end[0] = '\0';
   4189  1.1  mrg 
   4190  1.1  mrg   return start;
   4191  1.1  mrg }
   4192  1.1  mrg 
   4193  1.1  mrg /* Returns a pointer to a string which spells the token defined by
   4194  1.1  mrg    TYPE and FLAGS.  Used by C front ends, which really should move to
   4195  1.1  mrg    using cpp_token_as_text.  */
   4196  1.1  mrg const char *
   4197  1.1  mrg cpp_type2name (enum cpp_ttype type, unsigned char flags)
   4198  1.1  mrg {
   4199  1.1  mrg   if (flags & DIGRAPH)
   4200  1.1  mrg     return (const char *) cpp_digraph2name (type);
   4201  1.1  mrg   else if (flags & NAMED_OP)
   4202  1.1  mrg     return cpp_named_operator2name (type);
   4203  1.1  mrg 
   4204  1.1  mrg   return (const char *) token_spellings[type].name;
   4205  1.1  mrg }
   4206  1.1  mrg 
   4207  1.1  mrg /* Writes the spelling of token to FP, without any preceding space.
   4208  1.1  mrg    Separated from cpp_spell_token for efficiency - to avoid stdio
   4209  1.1  mrg    double-buffering.  */
   4210  1.1  mrg void
   4211  1.1  mrg cpp_output_token (const cpp_token *token, FILE *fp)
   4212  1.1  mrg {
   4213  1.1  mrg   switch (TOKEN_SPELL (token))
   4214  1.1  mrg     {
   4215  1.1  mrg     case SPELL_OPERATOR:
   4216  1.1  mrg       {
   4217  1.1  mrg 	const unsigned char *spelling;
   4218  1.1  mrg 	int c;
   4219  1.1  mrg 
   4220  1.1  mrg 	if (token->flags & DIGRAPH)
   4221  1.1  mrg 	  spelling = cpp_digraph2name (token->type);
   4222  1.1  mrg 	else if (token->flags & NAMED_OP)
   4223  1.1  mrg 	  goto spell_ident;
   4224  1.1  mrg 	else
   4225  1.1  mrg 	  spelling = TOKEN_NAME (token);
   4226  1.1  mrg 
   4227  1.1  mrg 	c = *spelling;
   4228  1.1  mrg 	do
   4229  1.1  mrg 	  putc (c, fp);
   4230  1.1  mrg 	while ((c = *++spelling) != '\0');
   4231  1.1  mrg       }
   4232  1.1  mrg       break;
   4233  1.1  mrg 
   4234  1.1  mrg     spell_ident:
   4235  1.1  mrg     case SPELL_IDENT:
   4236  1.1  mrg       {
   4237  1.1  mrg 	size_t i;
   4238  1.1  mrg 	const unsigned char * name = NODE_NAME (token->val.node.node);
   4239  1.1  mrg 
   4240  1.1  mrg 	for (i = 0; i < NODE_LEN (token->val.node.node); i++)
   4241  1.1  mrg 	  if (name[i] & ~0x7F)
   4242  1.1  mrg 	    {
   4243  1.1  mrg 	      unsigned char buffer[10];
   4244  1.1  mrg 	      i += utf8_to_ucn (buffer, name + i) - 1;
   4245  1.1  mrg 	      fwrite (buffer, 1, 10, fp);
   4246  1.1  mrg 	    }
   4247  1.1  mrg 	  else
   4248  1.1  mrg 	    fputc (NODE_NAME (token->val.node.node)[i], fp);
   4249  1.1  mrg       }
   4250  1.1  mrg       break;
   4251  1.1  mrg 
   4252  1.1  mrg     case SPELL_LITERAL:
   4253  1.1  mrg       if (token->type == CPP_HEADER_NAME)
   4254  1.1  mrg 	fputc ('"', fp);
   4255  1.1  mrg       fwrite (token->val.str.text, 1, token->val.str.len, fp);
   4256  1.1  mrg       if (token->type == CPP_HEADER_NAME)
   4257  1.1  mrg 	fputc ('"', fp);
   4258  1.1  mrg       break;
   4259  1.1  mrg 
   4260  1.1  mrg     case SPELL_NONE:
   4261  1.1  mrg       /* An error, most probably.  */
   4262  1.1  mrg       break;
   4263  1.1  mrg     }
   4264  1.1  mrg }
   4265  1.1  mrg 
   4266  1.1  mrg /* Compare two tokens.  */
   4267  1.1  mrg int
   4268  1.1  mrg _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
   4269  1.1  mrg {
   4270  1.1  mrg   if (a->type == b->type && a->flags == b->flags)
   4271  1.1  mrg     switch (TOKEN_SPELL (a))
   4272  1.1  mrg       {
   4273  1.1  mrg       default:			/* Keep compiler happy.  */
   4274  1.1  mrg       case SPELL_OPERATOR:
   4275  1.1  mrg 	/* token_no is used to track where multiple consecutive ##
   4276  1.1  mrg 	   tokens were originally located.  */
   4277  1.1  mrg 	return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
   4278  1.1  mrg       case SPELL_NONE:
   4279  1.1  mrg 	return (a->type != CPP_MACRO_ARG
   4280  1.1  mrg 		|| (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
   4281  1.1  mrg 		    && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
   4282  1.1  mrg       case SPELL_IDENT:
   4283  1.1  mrg 	return (a->val.node.node == b->val.node.node
   4284  1.1  mrg 		&& a->val.node.spelling == b->val.node.spelling);
   4285  1.1  mrg       case SPELL_LITERAL:
   4286  1.1  mrg 	return (a->val.str.len == b->val.str.len
   4287  1.1  mrg 		&& !memcmp (a->val.str.text, b->val.str.text,
   4288  1.1  mrg 			    a->val.str.len));
   4289  1.1  mrg       }
   4290  1.1  mrg 
   4291  1.1  mrg   return 0;
   4292  1.1  mrg }
   4293  1.1  mrg 
   4294  1.1  mrg /* Returns nonzero if a space should be inserted to avoid an
   4295  1.1  mrg    accidental token paste for output.  For simplicity, it is
   4296  1.1  mrg    conservative, and occasionally advises a space where one is not
   4297  1.1  mrg    needed, e.g. "." and ".2".  */
   4298  1.1  mrg int
   4299  1.1  mrg cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
   4300  1.1  mrg 		 const cpp_token *token2)
   4301  1.1  mrg {
   4302  1.1  mrg   enum cpp_ttype a = token1->type, b = token2->type;
   4303  1.1  mrg   cppchar_t c;
   4304  1.1  mrg 
   4305  1.1  mrg   if (token1->flags & NAMED_OP)
   4306  1.1  mrg     a = CPP_NAME;
   4307  1.1  mrg   if (token2->flags & NAMED_OP)
   4308  1.1  mrg     b = CPP_NAME;
   4309  1.1  mrg 
   4310  1.1  mrg   c = EOF;
   4311  1.1  mrg   if (token2->flags & DIGRAPH)
   4312  1.1  mrg     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
   4313  1.1  mrg   else if (token_spellings[b].category == SPELL_OPERATOR)
   4314  1.1  mrg     c = token_spellings[b].name[0];
   4315  1.1  mrg 
   4316  1.1  mrg   /* Quickly get everything that can paste with an '='.  */
   4317  1.1  mrg   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
   4318  1.1  mrg     return 1;
   4319  1.1  mrg 
   4320  1.1  mrg   switch (a)
   4321  1.1  mrg     {
   4322  1.1  mrg     case CPP_GREATER:	return c == '>';
   4323  1.1  mrg     case CPP_LESS:	return c == '<' || c == '%' || c == ':';
   4324  1.1  mrg     case CPP_PLUS:	return c == '+';
   4325  1.1  mrg     case CPP_MINUS:	return c == '-' || c == '>';
   4326  1.1  mrg     case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
   4327  1.1  mrg     case CPP_MOD:	return c == ':' || c == '>';
   4328  1.1  mrg     case CPP_AND:	return c == '&';
   4329  1.1  mrg     case CPP_OR:	return c == '|';
   4330  1.1  mrg     case CPP_COLON:	return c == ':' || c == '>';
   4331  1.1  mrg     case CPP_DEREF:	return c == '*';
   4332  1.1  mrg     case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
   4333  1.1  mrg     case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
   4334  1.1  mrg     case CPP_PRAGMA:
   4335  1.1  mrg     case CPP_NAME:	return ((b == CPP_NUMBER
   4336  1.1  mrg 				 && name_p (pfile, &token2->val.str))
   4337  1.1  mrg 				|| b == CPP_NAME
   4338  1.1  mrg 				|| b == CPP_CHAR || b == CPP_STRING); /* L */
   4339  1.1  mrg     case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
   4340  1.1  mrg 				|| b == CPP_CHAR
   4341  1.1  mrg 				|| c == '.' || c == '+' || c == '-');
   4342  1.1  mrg 				      /* UCNs */
   4343  1.1  mrg     case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
   4344  1.1  mrg 				 && b == CPP_NAME)
   4345  1.1  mrg 				|| (CPP_OPTION (pfile, objc)
   4346  1.1  mrg 				    && token1->val.str.text[0] == '@'
   4347  1.1  mrg 				    && (b == CPP_NAME || b == CPP_STRING)));
   4348  1.1  mrg     case CPP_LESS_EQ:	return c == '>';
   4349  1.1  mrg     case CPP_STRING:
   4350  1.1  mrg     case CPP_WSTRING:
   4351  1.1  mrg     case CPP_UTF8STRING:
   4352  1.1  mrg     case CPP_STRING16:
   4353  1.1  mrg     case CPP_STRING32:	return (CPP_OPTION (pfile, user_literals)
   4354  1.1  mrg 				&& (b == CPP_NAME
   4355  1.1  mrg 				    || (TOKEN_SPELL (token2) == SPELL_LITERAL
   4356  1.1  mrg 					&& ISIDST (token2->val.str.text[0]))));
   4357  1.1  mrg 
   4358  1.1  mrg     default:		break;
   4359  1.1  mrg     }
   4360  1.1  mrg 
   4361  1.1  mrg   return 0;
   4362  1.1  mrg }
   4363  1.1  mrg 
   4364  1.1  mrg /* Output all the remaining tokens on the current line, and a newline
   4365  1.1  mrg    character, to FP.  Leading whitespace is removed.  If there are
   4366  1.1  mrg    macros, special token padding is not performed.  */
   4367  1.1  mrg void
   4368  1.1  mrg cpp_output_line (cpp_reader *pfile, FILE *fp)
   4369  1.1  mrg {
   4370  1.1  mrg   const cpp_token *token;
   4371  1.1  mrg 
   4372  1.1  mrg   token = cpp_get_token (pfile);
   4373  1.1  mrg   while (token->type != CPP_EOF)
   4374  1.1  mrg     {
   4375  1.1  mrg       cpp_output_token (token, fp);
   4376  1.1  mrg       token = cpp_get_token (pfile);
   4377  1.1  mrg       if (token->flags & PREV_WHITE)
   4378  1.1  mrg 	putc (' ', fp);
   4379  1.1  mrg     }
   4380  1.1  mrg 
   4381  1.1  mrg   putc ('\n', fp);
   4382  1.1  mrg }
   4383  1.1  mrg 
   4384  1.1  mrg /* Return a string representation of all the remaining tokens on the
   4385  1.1  mrg    current line.  The result is allocated using xmalloc and must be
   4386  1.1  mrg    freed by the caller.  */
   4387  1.1  mrg unsigned char *
   4388  1.1  mrg cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
   4389  1.1  mrg {
   4390  1.1  mrg   const cpp_token *token;
   4391  1.1  mrg   unsigned int out = dir_name ? ustrlen (dir_name) : 0;
   4392  1.1  mrg   unsigned int alloced = 120 + out;
   4393  1.1  mrg   unsigned char *result = (unsigned char *) xmalloc (alloced);
   4394  1.1  mrg 
   4395  1.1  mrg   /* If DIR_NAME is empty, there are no initial contents.  */
   4396  1.1  mrg   if (dir_name)
   4397  1.1  mrg     {
   4398  1.1  mrg       sprintf ((char *) result, "#%s ", dir_name);
   4399  1.1  mrg       out += 2;
   4400  1.1  mrg     }
   4401  1.1  mrg 
   4402  1.1  mrg   token = cpp_get_token (pfile);
   4403  1.1  mrg   while (token->type != CPP_EOF)
   4404  1.1  mrg     {
   4405  1.1  mrg       unsigned char *last;
   4406  1.1  mrg       /* Include room for a possible space and the terminating nul.  */
   4407  1.1  mrg       unsigned int len = cpp_token_len (token) + 2;
   4408  1.1  mrg 
   4409  1.1  mrg       if (out + len > alloced)
   4410  1.1  mrg 	{
   4411  1.1  mrg 	  alloced *= 2;
   4412  1.1  mrg 	  if (out + len > alloced)
   4413  1.1  mrg 	    alloced = out + len;
   4414  1.1  mrg 	  result = (unsigned char *) xrealloc (result, alloced);
   4415  1.1  mrg 	}
   4416  1.1  mrg 
   4417  1.1  mrg       last = cpp_spell_token (pfile, token, &result[out], 0);
   4418  1.1  mrg       out = last - result;
   4419  1.1  mrg 
   4420  1.1  mrg       token = cpp_get_token (pfile);
   4421  1.1  mrg       if (token->flags & PREV_WHITE)
   4422  1.1  mrg 	result[out++] = ' ';
   4423  1.1  mrg     }
   4424  1.1  mrg 
   4425  1.1  mrg   result[out] = '\0';
   4426  1.1  mrg   return result;
   4427  1.1  mrg }
   4428  1.1  mrg 
   4429  1.1  mrg /* Memory buffers.  Changing these three constants can have a dramatic
   4430  1.1  mrg    effect on performance.  The values here are reasonable defaults,
   4431  1.1  mrg    but might be tuned.  If you adjust them, be sure to test across a
   4432  1.1  mrg    range of uses of cpplib, including heavy nested function-like macro
   4433  1.1  mrg    expansion.  Also check the change in peak memory usage (NJAMD is a
   4434  1.1  mrg    good tool for this).  */
   4435  1.1  mrg #define MIN_BUFF_SIZE 8000
   4436  1.1  mrg #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
   4437  1.1  mrg #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
   4438  1.1  mrg 	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
   4439  1.1  mrg 
   4440  1.1  mrg #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
   4441  1.1  mrg   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
   4442  1.1  mrg #endif
   4443  1.1  mrg 
   4444  1.1  mrg /* Create a new allocation buffer.  Place the control block at the end
   4445  1.1  mrg    of the buffer, so that buffer overflows will cause immediate chaos.  */
   4446  1.1  mrg static _cpp_buff *
   4447  1.1  mrg new_buff (size_t len)
   4448  1.1  mrg {
   4449  1.1  mrg   _cpp_buff *result;
   4450  1.1  mrg   unsigned char *base;
   4451  1.1  mrg 
   4452  1.1  mrg   if (len < MIN_BUFF_SIZE)
   4453  1.1  mrg     len = MIN_BUFF_SIZE;
   4454  1.1  mrg   len = CPP_ALIGN (len);
   4455  1.1  mrg 
   4456  1.1  mrg #ifdef ENABLE_VALGRIND_ANNOTATIONS
   4457  1.1  mrg   /* Valgrind warns about uses of interior pointers, so put _cpp_buff
   4458  1.1  mrg      struct first.  */
   4459  1.1  mrg   size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
   4460  1.1  mrg   base = XNEWVEC (unsigned char, len + slen);
   4461  1.1  mrg   result = (_cpp_buff *) base;
   4462  1.1  mrg   base += slen;
   4463  1.1  mrg #else
   4464  1.1  mrg   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
   4465  1.1  mrg   result = (_cpp_buff *) (base + len);
   4466  1.1  mrg #endif
   4467  1.1  mrg   result->base = base;
   4468  1.1  mrg   result->cur = base;
   4469  1.1  mrg   result->limit = base + len;
   4470  1.1  mrg   result->next = NULL;
   4471  1.1  mrg   return result;
   4472  1.1  mrg }
   4473  1.1  mrg 
   4474  1.1  mrg /* Place a chain of unwanted allocation buffers on the free list.  */
   4475  1.1  mrg void
   4476  1.1  mrg _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
   4477  1.1  mrg {
   4478  1.1  mrg   _cpp_buff *end = buff;
   4479  1.1  mrg 
   4480  1.1  mrg   while (end->next)
   4481  1.1  mrg     end = end->next;
   4482  1.1  mrg   end->next = pfile->free_buffs;
   4483  1.1  mrg   pfile->free_buffs = buff;
   4484  1.1  mrg }
   4485  1.1  mrg 
   4486  1.1  mrg /* Return a free buffer of size at least MIN_SIZE.  */
   4487  1.1  mrg _cpp_buff *
   4488  1.1  mrg _cpp_get_buff (cpp_reader *pfile, size_t min_size)
   4489  1.1  mrg {
   4490  1.1  mrg   _cpp_buff *result, **p;
   4491  1.1  mrg 
   4492  1.1  mrg   for (p = &pfile->free_buffs;; p = &(*p)->next)
   4493  1.1  mrg     {
   4494  1.1  mrg       size_t size;
   4495  1.1  mrg 
   4496  1.1  mrg       if (*p == NULL)
   4497  1.1  mrg 	return new_buff (min_size);
   4498  1.1  mrg       result = *p;
   4499  1.1  mrg       size = result->limit - result->base;
   4500  1.1  mrg       /* Return a buffer that's big enough, but don't waste one that's
   4501  1.1  mrg          way too big.  */
   4502  1.1  mrg       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
   4503  1.1  mrg 	break;
   4504  1.1  mrg     }
   4505  1.1  mrg 
   4506  1.1  mrg   *p = result->next;
   4507  1.1  mrg   result->next = NULL;
   4508  1.1  mrg   result->cur = result->base;
   4509  1.1  mrg   return result;
   4510  1.1  mrg }
   4511  1.1  mrg 
   4512  1.1  mrg /* Creates a new buffer with enough space to hold the uncommitted
   4513  1.1  mrg    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
   4514  1.1  mrg    the excess bytes to the new buffer.  Chains the new buffer after
   4515  1.1  mrg    BUFF, and returns the new buffer.  */
   4516  1.1  mrg _cpp_buff *
   4517  1.1  mrg _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
   4518  1.1  mrg {
   4519  1.1  mrg   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
   4520  1.1  mrg   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
   4521  1.1  mrg 
   4522  1.1  mrg   buff->next = new_buff;
   4523  1.1  mrg   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
   4524  1.1  mrg   return new_buff;
   4525  1.1  mrg }
   4526  1.1  mrg 
   4527  1.1  mrg /* Creates a new buffer with enough space to hold the uncommitted
   4528  1.1  mrg    remaining bytes of the buffer pointed to by BUFF, and at least
   4529  1.1  mrg    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
   4530  1.1  mrg    Chains the new buffer before the buffer pointed to by BUFF, and
   4531  1.1  mrg    updates the pointer to point to the new buffer.  */
   4532  1.1  mrg void
   4533  1.1  mrg _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
   4534  1.1  mrg {
   4535  1.1  mrg   _cpp_buff *new_buff, *old_buff = *pbuff;
   4536  1.1  mrg   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
   4537  1.1  mrg 
   4538  1.1  mrg   new_buff = _cpp_get_buff (pfile, size);
   4539  1.1  mrg   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
   4540  1.1  mrg   new_buff->next = old_buff;
   4541  1.1  mrg   *pbuff = new_buff;
   4542  1.1  mrg }
   4543  1.1  mrg 
   4544  1.1  mrg /* Free a chain of buffers starting at BUFF.  */
   4545  1.1  mrg void
   4546  1.1  mrg _cpp_free_buff (_cpp_buff *buff)
   4547  1.1  mrg {
   4548  1.1  mrg   _cpp_buff *next;
   4549  1.1  mrg 
   4550  1.1  mrg   for (; buff; buff = next)
   4551  1.1  mrg     {
   4552  1.1  mrg       next = buff->next;
   4553  1.1  mrg #ifdef ENABLE_VALGRIND_ANNOTATIONS
   4554  1.1  mrg       free (buff);
   4555  1.1  mrg #else
   4556  1.1  mrg       free (buff->base);
   4557  1.1  mrg #endif
   4558  1.1  mrg     }
   4559  1.1  mrg }
   4560  1.1  mrg 
   4561  1.1  mrg /* Allocate permanent, unaligned storage of length LEN.  */
   4562  1.1  mrg unsigned char *
   4563  1.1  mrg _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
   4564  1.1  mrg {
   4565  1.1  mrg   _cpp_buff *buff = pfile->u_buff;
   4566  1.1  mrg   unsigned char *result = buff->cur;
   4567  1.1  mrg 
   4568  1.1  mrg   if (len > (size_t) (buff->limit - result))
   4569  1.1  mrg     {
   4570  1.1  mrg       buff = _cpp_get_buff (pfile, len);
   4571  1.1  mrg       buff->next = pfile->u_buff;
   4572  1.1  mrg       pfile->u_buff = buff;
   4573  1.1  mrg       result = buff->cur;
   4574  1.1  mrg     }
   4575  1.1  mrg 
   4576  1.1  mrg   buff->cur = result + len;
   4577  1.1  mrg   return result;
   4578  1.1  mrg }
   4579  1.1  mrg 
   4580  1.1  mrg /* Allocate permanent, unaligned storage of length LEN from a_buff.
   4581  1.1  mrg    That buffer is used for growing allocations when saving macro
   4582  1.1  mrg    replacement lists in a #define, and when parsing an answer to an
   4583  1.1  mrg    assertion in #assert, #unassert or #if (and therefore possibly
   4584  1.1  mrg    whilst expanding macros).  It therefore must not be used by any
   4585  1.1  mrg    code that they might call: specifically the lexer and the guts of
   4586  1.1  mrg    the macro expander.
   4587  1.1  mrg 
   4588  1.1  mrg    All existing other uses clearly fit this restriction: storing
   4589  1.1  mrg    registered pragmas during initialization.  */
   4590  1.1  mrg unsigned char *
   4591  1.1  mrg _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
   4592  1.1  mrg {
   4593  1.1  mrg   _cpp_buff *buff = pfile->a_buff;
   4594  1.1  mrg   unsigned char *result = buff->cur;
   4595  1.1  mrg 
   4596  1.1  mrg   if (len > (size_t) (buff->limit - result))
   4597  1.1  mrg     {
   4598  1.1  mrg       buff = _cpp_get_buff (pfile, len);
   4599  1.1  mrg       buff->next = pfile->a_buff;
   4600  1.1  mrg       pfile->a_buff = buff;
   4601  1.1  mrg       result = buff->cur;
   4602  1.1  mrg     }
   4603  1.1  mrg 
   4604  1.1  mrg   buff->cur = result + len;
   4605  1.1  mrg   return result;
   4606  1.1  mrg }
   4607  1.1  mrg 
   4608  1.1  mrg /* Commit or allocate storage from a buffer.  */
   4609  1.1  mrg 
   4610  1.1  mrg void *
   4611  1.1  mrg _cpp_commit_buff (cpp_reader *pfile, size_t size)
   4612  1.1  mrg {
   4613  1.1  mrg   void *ptr = BUFF_FRONT (pfile->a_buff);
   4614  1.1  mrg 
   4615  1.1  mrg   if (pfile->hash_table->alloc_subobject)
   4616  1.1  mrg     {
   4617  1.1  mrg       void *copy = pfile->hash_table->alloc_subobject (size);
   4618  1.1  mrg       memcpy (copy, ptr, size);
   4619  1.1  mrg       ptr = copy;
   4620  1.1  mrg     }
   4621  1.1  mrg   else
   4622  1.1  mrg     BUFF_FRONT (pfile->a_buff) += size;
   4623  1.1  mrg 
   4624  1.1  mrg   return ptr;
   4625  1.1  mrg }
   4626  1.1  mrg 
   4627  1.1  mrg /* Say which field of TOK is in use.  */
   4628  1.1  mrg 
   4629  1.1  mrg enum cpp_token_fld_kind
   4630  1.1  mrg cpp_token_val_index (const cpp_token *tok)
   4631  1.1  mrg {
   4632  1.1  mrg   switch (TOKEN_SPELL (tok))
   4633  1.1  mrg     {
   4634  1.1  mrg     case SPELL_IDENT:
   4635  1.1  mrg       return CPP_TOKEN_FLD_NODE;
   4636  1.1  mrg     case SPELL_LITERAL:
   4637  1.1  mrg       return CPP_TOKEN_FLD_STR;
   4638  1.1  mrg     case SPELL_OPERATOR:
   4639  1.1  mrg       /* Operands which were originally spelled as ident keep around
   4640  1.1  mrg          the node for the exact spelling.  */
   4641  1.1  mrg       if (tok->flags & NAMED_OP)
   4642  1.1  mrg 	return CPP_TOKEN_FLD_NODE;
   4643  1.1  mrg       else if (tok->type == CPP_PASTE)
   4644  1.1  mrg 	return CPP_TOKEN_FLD_TOKEN_NO;
   4645  1.1  mrg       else
   4646  1.1  mrg 	return CPP_TOKEN_FLD_NONE;
   4647  1.1  mrg     case SPELL_NONE:
   4648  1.1  mrg       if (tok->type == CPP_MACRO_ARG)
   4649  1.1  mrg 	return CPP_TOKEN_FLD_ARG_NO;
   4650  1.1  mrg       else if (tok->type == CPP_PADDING)
   4651  1.1  mrg 	return CPP_TOKEN_FLD_SOURCE;
   4652  1.1  mrg       else if (tok->type == CPP_PRAGMA)
   4653  1.1  mrg 	return CPP_TOKEN_FLD_PRAGMA;
   4654  1.1  mrg       /* fall through */
   4655  1.1  mrg     default:
   4656  1.1  mrg       return CPP_TOKEN_FLD_NONE;
   4657  1.1  mrg     }
   4658  1.1  mrg }
   4659  1.1  mrg 
   4660  1.1  mrg /* All tokens lexed in R after calling this function will be forced to
   4661  1.1  mrg    have their location_t to be P, until
   4662  1.1  mrg    cpp_stop_forcing_token_locations is called for R.  */
   4663  1.1  mrg 
   4664  1.1  mrg void
   4665  1.1  mrg cpp_force_token_locations (cpp_reader *r, location_t loc)
   4666  1.1  mrg {
   4667  1.1  mrg   r->forced_token_location = loc;
   4668  1.1  mrg }
   4669  1.1  mrg 
   4670  1.1  mrg /* Go back to assigning locations naturally for lexed tokens.  */
   4671  1.1  mrg 
   4672  1.1  mrg void
   4673  1.1  mrg cpp_stop_forcing_token_locations (cpp_reader *r)
   4674  1.1  mrg {
   4675  1.1  mrg   r->forced_token_location = 0;
   4676  1.1  mrg }
   4677  1.1  mrg 
   4678  1.1  mrg /* We're looking at \, if it's escaping EOL, look past it.  If at
   4679  1.1  mrg    LIMIT, don't advance.  */
   4680  1.1  mrg 
   4681  1.1  mrg static const unsigned char *
   4682  1.1  mrg do_peek_backslash (const unsigned char *peek, const unsigned char *limit)
   4683  1.1  mrg {
   4684  1.1  mrg   const unsigned char *probe = peek;
   4685  1.1  mrg 
   4686  1.1  mrg   if (__builtin_expect (peek[1] == '\n', true))
   4687  1.1  mrg     {
   4688  1.1  mrg     eol:
   4689  1.1  mrg       probe += 2;
   4690  1.1  mrg       if (__builtin_expect (probe < limit, true))
   4691  1.1  mrg 	{
   4692  1.1  mrg 	  peek = probe;
   4693  1.1  mrg 	  if (*peek == '\\')
   4694  1.1  mrg 	    /* The user might be perverse.  */
   4695  1.1  mrg 	    return do_peek_backslash (peek, limit);
   4696  1.1  mrg 	}
   4697  1.1  mrg     }
   4698  1.1  mrg   else if (__builtin_expect (peek[1] == '\r', false))
   4699  1.1  mrg     {
   4700  1.1  mrg       if (probe[2] == '\n')
   4701  1.1  mrg 	probe++;
   4702  1.1  mrg       goto eol;
   4703  1.1  mrg     }
   4704  1.1  mrg 
   4705  1.1  mrg   return peek;
   4706  1.1  mrg }
   4707  1.1  mrg 
   4708  1.1  mrg static const unsigned char *
   4709  1.1  mrg do_peek_next (const unsigned char *peek, const unsigned char *limit)
   4710  1.1  mrg {
   4711  1.1  mrg   if (__builtin_expect (*peek == '\\', false))
   4712  1.1  mrg     peek = do_peek_backslash (peek, limit);
   4713  1.1  mrg   return peek;
   4714  1.1  mrg }
   4715  1.1  mrg 
   4716  1.1  mrg static const unsigned char *
   4717  1.1  mrg do_peek_prev (const unsigned char *peek, const unsigned char *bound)
   4718  1.1  mrg {
   4719  1.1  mrg   if (peek == bound)
   4720  1.1  mrg     return NULL;
   4721  1.1  mrg 
   4722  1.1  mrg   unsigned char c = *--peek;
   4723  1.1  mrg   if (__builtin_expect (c == '\n', false)
   4724  1.1  mrg       || __builtin_expect (c == 'r', false))
   4725  1.1  mrg     {
   4726  1.1  mrg       if (peek == bound)
   4727  1.1  mrg 	return peek;
   4728  1.1  mrg       int ix = -1;
   4729  1.1  mrg       if (c == '\n' && peek[ix] == '\r')
   4730  1.1  mrg 	{
   4731  1.1  mrg 	  if (peek + ix == bound)
   4732  1.1  mrg 	    return peek;
   4733  1.1  mrg 	  ix--;
   4734  1.1  mrg 	}
   4735  1.1  mrg 
   4736  1.1  mrg       if (peek[ix] == '\\')
   4737  1.1  mrg 	return do_peek_prev (peek + ix, bound);
   4738  1.1  mrg 
   4739  1.1  mrg       return peek;
   4740  1.1  mrg     }
   4741  1.1  mrg   else
   4742  1.1  mrg     return peek;
   4743  1.1  mrg }
   4744  1.1  mrg 
   4745  1.1  mrg /* If PEEK[-1] is identifier MATCH, scan past it and trailing white
   4746  1.1  mrg    space.  Otherwise return NULL.  */
   4747  1.1  mrg 
   4748  1.1  mrg static const unsigned char *
   4749  1.1  mrg do_peek_ident (const char *match, const unsigned char *peek,
   4750  1.1  mrg 	       const unsigned char *limit)
   4751  1.1  mrg {
   4752  1.1  mrg   for (; *++match; peek++)
   4753  1.1  mrg     if (*peek != *match)
   4754  1.1  mrg       {
   4755  1.1  mrg 	peek = do_peek_next (peek, limit);
   4756  1.1  mrg 	if (*peek != *match)
   4757  1.1  mrg 	  return NULL;
   4758  1.1  mrg       }
   4759  1.1  mrg 
   4760  1.1  mrg   /* Must now not be looking at an identifier char.  */
   4761  1.1  mrg   peek = do_peek_next (peek, limit);
   4762  1.1  mrg   if (ISIDNUM (*peek))
   4763  1.1  mrg     return NULL;
   4764  1.1  mrg 
   4765  1.1  mrg   /* Skip control-line whitespace.  */
   4766  1.1  mrg  ws:
   4767  1.1  mrg   while (*peek == ' ' || *peek == '\t')
   4768  1.1  mrg     peek++;
   4769  1.1  mrg   if (__builtin_expect (*peek == '\\', false))
   4770  1.1  mrg     {
   4771  1.1  mrg       peek = do_peek_backslash (peek, limit);
   4772  1.1  mrg       if (*peek != '\\')
   4773  1.1  mrg 	goto ws;
   4774  1.1  mrg     }
   4775  1.1  mrg 
   4776  1.1  mrg   return peek;
   4777  1.1  mrg }
   4778  1.1  mrg 
   4779  1.1  mrg /* Are we looking at a module control line starting as PEEK - 1?  */
   4780  1.1  mrg 
   4781  1.1  mrg static bool
   4782  1.1  mrg do_peek_module (cpp_reader *pfile, unsigned char c,
   4783  1.1  mrg 		const unsigned char *peek, const unsigned char *limit)
   4784  1.1  mrg {
   4785  1.1  mrg   bool import = false;
   4786  1.1  mrg 
   4787  1.1  mrg   if (__builtin_expect (c == 'e', false))
   4788  1.1  mrg     {
   4789  1.1  mrg       if (!((peek[0] == 'x' || peek[0] == '\\')
   4790  1.1  mrg 	    && (peek = do_peek_ident ("export", peek, limit))))
   4791  1.1  mrg 	return false;
   4792  1.1  mrg 
   4793  1.1  mrg       /* export, peek for import or module.  No need to peek __import
   4794  1.1  mrg 	 here.  */
   4795  1.1  mrg       if (peek[0] == 'i')
   4796  1.1  mrg 	{
   4797  1.1  mrg 	  if (!((peek[1] == 'm' || peek[1] == '\\')
   4798  1.1  mrg 		&& (peek = do_peek_ident ("import", peek + 1, limit))))
   4799  1.1  mrg 	    return false;
   4800  1.1  mrg 	  import = true;
   4801  1.1  mrg 	}
   4802  1.1  mrg       else if (peek[0] == 'm')
   4803  1.1  mrg 	{
   4804  1.1  mrg 	  if (!((peek[1] == 'o' || peek[1] == '\\')
   4805  1.1  mrg 		&& (peek = do_peek_ident ("module", peek + 1, limit))))
   4806  1.1  mrg 	    return false;
   4807  1.1  mrg 	}
   4808  1.1  mrg       else
   4809  1.1  mrg 	return false;
   4810  1.1  mrg     }
   4811  1.1  mrg   else if (__builtin_expect (c == 'i', false))
   4812  1.1  mrg     {
   4813  1.1  mrg       if (!((peek[0] == 'm' || peek[0] == '\\')
   4814  1.1  mrg 	    && (peek = do_peek_ident ("import", peek, limit))))
   4815  1.1  mrg 	return false;
   4816  1.1  mrg       import = true;
   4817  1.1  mrg     }
   4818  1.1  mrg   else if (__builtin_expect (c == '_', false))
   4819  1.1  mrg     {
   4820  1.1  mrg       /* Needed for translated includes.   */
   4821  1.1  mrg       if (!((peek[0] == '_' || peek[0] == '\\')
   4822  1.1  mrg 	    && (peek = do_peek_ident ("__import", peek, limit))))
   4823  1.1  mrg 	return false;
   4824  1.1  mrg       import = true;
   4825  1.1  mrg     }
   4826  1.1  mrg   else if (__builtin_expect (c == 'm', false))
   4827  1.1  mrg     {
   4828  1.1  mrg       if (!((peek[0] == 'o' || peek[0] == '\\')
   4829  1.1  mrg 	    && (peek = do_peek_ident ("module", peek, limit))))
   4830  1.1  mrg 	return false;
   4831  1.1  mrg     }
   4832  1.1  mrg   else
   4833  1.1  mrg     return false;
   4834  1.1  mrg 
   4835  1.1  mrg   /* Peek the next character to see if it's good enough.  We'll be at
   4836  1.1  mrg      the first non-whitespace char, including skipping an escaped
   4837  1.1  mrg      newline.  */
   4838  1.1  mrg   /* ... import followed by identifier, ':', '<' or header-name
   4839  1.1  mrg      preprocessing tokens, or module followed by identifier, ':' or
   4840  1.1  mrg      ';' preprocessing tokens.  */
   4841  1.1  mrg   unsigned char p = *peek++;
   4842  1.1  mrg 
   4843  1.1  mrg   /* A character literal is ... single quotes, ... optionally preceded
   4844  1.1  mrg      by u8, u, U, or L */
   4845  1.1  mrg   /* A string-literal is a ... double quotes, optionally prefixed by
   4846  1.1  mrg      R, u8, u8R, u, uR, U, UR, L, or LR */
   4847  1.1  mrg   if (p == 'u')
   4848  1.1  mrg     {
   4849  1.1  mrg       peek = do_peek_next (peek, limit);
   4850  1.1  mrg       if (*peek == '8')
   4851  1.1  mrg 	{
   4852  1.1  mrg 	  peek++;
   4853  1.1  mrg 	  goto peek_u8;
   4854  1.1  mrg 	}
   4855  1.1  mrg       goto peek_u;
   4856  1.1  mrg     }
   4857  1.1  mrg   else if (p == 'U' || p == 'L')
   4858  1.1  mrg     {
   4859  1.1  mrg     peek_u8:
   4860  1.1  mrg       peek = do_peek_next (peek, limit);
   4861  1.1  mrg     peek_u:
   4862  1.1  mrg       if (*peek == '\"' || *peek == '\'')
   4863  1.1  mrg 	return false;
   4864  1.1  mrg 
   4865  1.1  mrg       if (*peek == 'R')
   4866  1.1  mrg 	goto peek_R;
   4867  1.1  mrg       /* Identifier. Ok.  */
   4868  1.1  mrg     }
   4869  1.1  mrg   else if (p == 'R')
   4870  1.1  mrg     {
   4871  1.1  mrg     peek_R:
   4872  1.1  mrg       if (CPP_OPTION (pfile, rliterals))
   4873  1.1  mrg 	{
   4874  1.1  mrg 	  peek = do_peek_next (peek, limit);
   4875  1.1  mrg 	  if (*peek == '\"')
   4876  1.1  mrg 	    return false;
   4877  1.1  mrg 	}
   4878  1.1  mrg       /* Identifier. Ok.  */
   4879  1.1  mrg     }
   4880  1.1  mrg   else if ('Z' - 'A' == 25
   4881  1.1  mrg 	   ? ((p >= 'A' && p <= 'Z') || (p >= 'a' && p <= 'z') || p == '_')
   4882  1.1  mrg 	   : ISIDST (p))
   4883  1.1  mrg     {
   4884  1.1  mrg       /* Identifier.  Ok. */
   4885  1.1  mrg     }
   4886  1.1  mrg   else if (p == '<')
   4887  1.1  mrg     {
   4888  1.1  mrg       /* Maybe angle header, ok for import.  Reject
   4889  1.1  mrg 	 '<=', '<<' digraph:'<:'.  */
   4890  1.1  mrg       if (!import)
   4891  1.1  mrg 	return false;
   4892  1.1  mrg       peek = do_peek_next (peek, limit);
   4893  1.1  mrg       if (*peek == '=' || *peek == '<'
   4894  1.1  mrg 	  || (*peek == ':' && CPP_OPTION (pfile, digraphs)))
   4895  1.1  mrg 	return false;
   4896  1.1  mrg     }
   4897  1.1  mrg   else if (p == ';')
   4898  1.1  mrg     {
   4899  1.1  mrg       /* SEMICOLON, ok for module.  */
   4900  1.1  mrg       if (import)
   4901  1.1  mrg 	return false;
   4902  1.1  mrg     }
   4903  1.1  mrg   else if (p == '"')
   4904  1.1  mrg     {
   4905  1.1  mrg       /* STRING, ok for import.  */
   4906  1.1  mrg       if (!import)
   4907  1.1  mrg 	return false;
   4908  1.1  mrg     }
   4909  1.1  mrg   else if (p == ':')
   4910  1.1  mrg     {
   4911  1.1  mrg       /* Maybe COLON, ok.  Reject '::', digraph:':>'.  */
   4912  1.1  mrg       peek = do_peek_next (peek, limit);
   4913  1.1  mrg       if (*peek == ':' || (*peek == '>' && CPP_OPTION (pfile, digraphs)))
   4914  1.1  mrg 	return false;
   4915  1.1  mrg     }
   4916  1.1  mrg   else
   4917  1.1  mrg     /* FIXME: Detect a unicode character, excluding those not
   4918  1.1  mrg        permitted as the initial character. [lex.name]/1.  I presume
   4919  1.1  mrg        we need to check the \[uU] spellings, and directly using
   4920  1.1  mrg        Unicode in say UTF8 form?  Or perhaps we do the phase-1
   4921  1.1  mrg        conversion of UTF8 to universal-character-names?  */
   4922  1.1  mrg     return false;
   4923  1.1  mrg 
   4924  1.1  mrg   return true;
   4925  1.1  mrg }
   4926  1.1  mrg 
   4927  1.1  mrg /* Directives-only scanning.  Somewhat more relaxed than correct
   4928  1.1  mrg    parsing -- some ill-formed programs will not be rejected.  */
   4929  1.1  mrg 
   4930  1.1  mrg void
   4931  1.1  mrg cpp_directive_only_process (cpp_reader *pfile,
   4932  1.1  mrg 			    void *data,
   4933  1.1  mrg 			    void (*cb) (cpp_reader *, CPP_DO_task, void *, ...))
   4934  1.1  mrg {
   4935  1.1  mrg   bool module_p = CPP_OPTION (pfile, module_directives);
   4936  1.1  mrg 
   4937  1.1  mrg   do
   4938  1.1  mrg     {
   4939  1.1  mrg     restart:
   4940  1.1  mrg       /* Buffer initialization, but no line cleaning. */
   4941  1.1  mrg       cpp_buffer *buffer = pfile->buffer;
   4942  1.1  mrg       buffer->cur_note = buffer->notes_used = 0;
   4943  1.1  mrg       buffer->cur = buffer->line_base = buffer->next_line;
   4944  1.1  mrg       buffer->need_line = false;
   4945  1.1  mrg       /* Files always end in a newline or carriage return.  We rely on this for
   4946  1.1  mrg 	 character peeking safety.  */
   4947  1.1  mrg       gcc_assert (buffer->rlimit[0] == '\n' || buffer->rlimit[0] == '\r');
   4948  1.1  mrg 
   4949  1.1  mrg       const unsigned char *base = buffer->cur;
   4950  1.1  mrg       unsigned line_count = 0;
   4951  1.1  mrg       const unsigned char *line_start = base;
   4952  1.1  mrg 
   4953  1.1  mrg       bool bol = true;
   4954  1.1  mrg       bool raw = false;
   4955  1.1  mrg 
   4956  1.1  mrg       const unsigned char *lwm = base;
   4957  1.1  mrg       for (const unsigned char *pos = base, *limit = buffer->rlimit;
   4958  1.1  mrg 	   pos < limit;)
   4959  1.1  mrg 	{
   4960  1.1  mrg 	  unsigned char c = *pos++;
   4961  1.1  mrg 	  /* This matches the switch in _cpp_lex_direct.  */
   4962  1.1  mrg 	  switch (c)
   4963  1.1  mrg 	    {
   4964  1.1  mrg 	    case ' ': case '\t': case '\f': case '\v':
   4965  1.1  mrg 	      /* Whitespace, do nothing.  */
   4966  1.1  mrg 	      break;
   4967  1.1  mrg 
   4968  1.1  mrg 	    case '\r': /* MAC line ending, or Windows \r\n  */
   4969  1.1  mrg 	      if (*pos == '\n')
   4970  1.1  mrg 		pos++;
   4971  1.1  mrg 	      /* FALLTHROUGH */
   4972  1.1  mrg 
   4973  1.1  mrg 	    case '\n':
   4974  1.1  mrg 	      bol = true;
   4975  1.1  mrg 
   4976  1.1  mrg 	    next_line:
   4977  1.1  mrg 	      CPP_INCREMENT_LINE (pfile, 0);
   4978  1.1  mrg 	      line_count++;
   4979  1.1  mrg 	      line_start = pos;
   4980  1.1  mrg 	      break;
   4981  1.1  mrg 
   4982  1.1  mrg 	    case '\\':
   4983  1.1  mrg 	      /* <backslash><newline> is removed, and doesn't undo any
   4984  1.1  mrg 		 preceeding escape or whatnot.  */
   4985  1.1  mrg 	      if (*pos == '\n')
   4986  1.1  mrg 		{
   4987  1.1  mrg 		  pos++;
   4988  1.1  mrg 		  goto next_line;
   4989  1.1  mrg 		}
   4990  1.1  mrg 	      else if (*pos == '\r')
   4991  1.1  mrg 		{
   4992  1.1  mrg 		  if (pos[1] == '\n')
   4993  1.1  mrg 		    pos++;
   4994  1.1  mrg 		  pos++;
   4995  1.1  mrg 		  goto next_line;
   4996  1.1  mrg 		}
   4997  1.1  mrg 	      goto dflt;
   4998  1.1  mrg 
   4999  1.1  mrg 	    case '#':
   5000  1.1  mrg 	      if (bol)
   5001  1.1  mrg 		{
   5002  1.1  mrg 		  /* Line directive.  */
   5003  1.1  mrg 		  if (pos - 1 > base && !pfile->state.skipping)
   5004  1.1  mrg 		    cb (pfile, CPP_DO_print, data,
   5005  1.1  mrg 			line_count, base, pos - 1 - base);
   5006  1.1  mrg 
   5007  1.1  mrg 		  /* Prep things for directive handling. */
   5008  1.1  mrg 		  buffer->next_line = pos;
   5009  1.1  mrg 		  buffer->need_line = true;
   5010  1.1  mrg 		  bool ok = _cpp_get_fresh_line (pfile);
   5011  1.1  mrg 		  gcc_checking_assert (ok);
   5012  1.1  mrg 
   5013  1.1  mrg 		  /* Ensure proper column numbering for generated
   5014  1.1  mrg 		     error messages. */
   5015  1.1  mrg 		  buffer->line_base -= pos - line_start;
   5016  1.1  mrg 
   5017  1.1  mrg 		  _cpp_handle_directive (pfile, line_start + 1 != pos);
   5018  1.1  mrg 
   5019  1.1  mrg 		  /* Sanitize the line settings.  Duplicate #include's can
   5020  1.1  mrg 		     mess things up. */
   5021  1.1  mrg 		  // FIXME: Necessary?
   5022  1.1  mrg 		  pfile->line_table->highest_location
   5023  1.1  mrg 		    = pfile->line_table->highest_line;
   5024  1.1  mrg 
   5025  1.1  mrg 		  if (!pfile->state.skipping
   5026  1.1  mrg 		      && pfile->buffer->next_line < pfile->buffer->rlimit)
   5027  1.1  mrg 		    cb (pfile, CPP_DO_location, data,
   5028  1.1  mrg 			pfile->line_table->highest_line);
   5029  1.1  mrg 
   5030  1.1  mrg 		  goto restart;
   5031  1.1  mrg 		}
   5032  1.1  mrg 	      goto dflt;
   5033  1.1  mrg 
   5034  1.1  mrg 	    case '/':
   5035  1.1  mrg 	      {
   5036  1.1  mrg 		const unsigned char *peek = do_peek_next (pos, limit);
   5037  1.1  mrg 		if (!(*peek == '/' || *peek == '*'))
   5038  1.1  mrg 		  goto dflt;
   5039  1.1  mrg 
   5040  1.1  mrg 		/* Line or block comment  */
   5041  1.1  mrg 		bool is_block = *peek == '*';
   5042  1.1  mrg 		bool star = false;
   5043  1.1  mrg 		bool esc = false;
   5044  1.1  mrg 		location_t sloc
   5045  1.1  mrg 		  = linemap_position_for_column (pfile->line_table,
   5046  1.1  mrg 						 pos - line_start);
   5047  1.1  mrg 
   5048  1.1  mrg 		while (pos < limit)
   5049  1.1  mrg 		  {
   5050  1.1  mrg 		    char c = *pos++;
   5051  1.1  mrg 		    switch (c)
   5052  1.1  mrg 		      {
   5053  1.1  mrg 		      case '\\':
   5054  1.1  mrg 			esc = true;
   5055  1.1  mrg 			break;
   5056  1.1  mrg 
   5057  1.1  mrg 		      case '\r':
   5058  1.1  mrg 			if (*pos == '\n')
   5059  1.1  mrg 			  pos++;
   5060  1.1  mrg 			/* FALLTHROUGH  */
   5061  1.1  mrg 
   5062  1.1  mrg 		      case '\n':
   5063  1.1  mrg 			{
   5064  1.1  mrg 			  CPP_INCREMENT_LINE (pfile, 0);
   5065  1.1  mrg 			  line_count++;
   5066  1.1  mrg 			  line_start = pos;
   5067  1.1  mrg 			  if (!esc && !is_block)
   5068  1.1  mrg 			    {
   5069  1.1  mrg 			      bol = true;
   5070  1.1  mrg 			      goto done_comment;
   5071  1.1  mrg 			    }
   5072  1.1  mrg 			}
   5073  1.1  mrg 			if (!esc)
   5074  1.1  mrg 			  star = false;
   5075  1.1  mrg 			esc = false;
   5076  1.1  mrg 			break;
   5077  1.1  mrg 
   5078  1.1  mrg 		      case '*':
   5079  1.1  mrg 			if (pos > peek)
   5080  1.1  mrg 			  star = is_block;
   5081  1.1  mrg 			esc = false;
   5082  1.1  mrg 			break;
   5083  1.1  mrg 
   5084  1.1  mrg 		      case '/':
   5085  1.1  mrg 			if (star)
   5086  1.1  mrg 			  goto done_comment;
   5087  1.1  mrg 			/* FALLTHROUGH  */
   5088  1.1  mrg 
   5089  1.1  mrg 		      default:
   5090  1.1  mrg 			star = false;
   5091  1.1  mrg 			esc = false;
   5092  1.1  mrg 			break;
   5093  1.1  mrg 		      }
   5094  1.1  mrg 		  }
   5095  1.1  mrg 		if (pos < limit || is_block)
   5096  1.1  mrg 		  cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0,
   5097  1.1  mrg 				       "unterminated comment");
   5098  1.1  mrg 	      done_comment:
   5099  1.1  mrg 		lwm = pos;
   5100  1.1  mrg 		break;
   5101  1.1  mrg 	      }
   5102  1.1  mrg 
   5103  1.1  mrg 	    case '\'':
   5104  1.1  mrg 	      if (!CPP_OPTION (pfile, digit_separators))
   5105  1.1  mrg 		goto delimited_string;
   5106  1.1  mrg 
   5107  1.1  mrg 	      /* Possibly a number punctuator.  */
   5108  1.1  mrg 	      if (!ISIDNUM (*do_peek_next (pos, limit)))
   5109  1.1  mrg 		goto delimited_string;
   5110  1.1  mrg 
   5111  1.1  mrg 	      goto quote_peek;
   5112  1.1  mrg 
   5113  1.1  mrg 	    case '\"':
   5114  1.1  mrg 	      if (!CPP_OPTION (pfile, rliterals))
   5115  1.1  mrg 		goto delimited_string;
   5116  1.1  mrg 
   5117  1.1  mrg 	    quote_peek:
   5118  1.1  mrg 	      {
   5119  1.1  mrg 		/* For ' see if it's a number punctuator
   5120  1.1  mrg 		   \.?<digit>(<digit>|<identifier-nondigit>
   5121  1.1  mrg 		   |'<digit>|'<nondigit>|[eEpP]<sign>|\.)* */
   5122  1.1  mrg 		/* For " see if it's a raw string
   5123  1.1  mrg 		   {U,L,u,u8}R.  This includes CPP_NUMBER detection,
   5124  1.1  mrg 		   because that could be 0e+R.  */
   5125  1.1  mrg 		const unsigned char *peek = pos - 1;
   5126  1.1  mrg 		bool quote_first = c == '"';
   5127  1.1  mrg 		bool quote_eight = false;
   5128  1.1  mrg 		bool maybe_number_start = false;
   5129  1.1  mrg 		bool want_number = false;
   5130  1.1  mrg 
   5131  1.1  mrg 		while ((peek = do_peek_prev (peek, lwm)))
   5132  1.1  mrg 		  {
   5133  1.1  mrg 		    unsigned char p = *peek;
   5134  1.1  mrg 		    if (quote_first)
   5135  1.1  mrg 		      {
   5136  1.1  mrg 			if (!raw)
   5137  1.1  mrg 			  {
   5138  1.1  mrg 			    if (p != 'R')
   5139  1.1  mrg 			      break;
   5140  1.1  mrg 			    raw = true;
   5141  1.1  mrg 			    continue;
   5142  1.1  mrg 			  }
   5143  1.1  mrg 
   5144  1.1  mrg 			quote_first = false;
   5145  1.1  mrg 			if (p == 'L' || p == 'U' || p == 'u')
   5146  1.1  mrg 			  ;
   5147  1.1  mrg 			else if (p == '8')
   5148  1.1  mrg 			  quote_eight = true;
   5149  1.1  mrg 			else
   5150  1.1  mrg 			  goto second_raw;
   5151  1.1  mrg 		      }
   5152  1.1  mrg 		    else if (quote_eight)
   5153  1.1  mrg 		      {
   5154  1.1  mrg 			if (p != 'u')
   5155  1.1  mrg 			  {
   5156  1.1  mrg 			    raw = false;
   5157  1.1  mrg 			    break;
   5158  1.1  mrg 			  }
   5159  1.1  mrg 			quote_eight = false;
   5160  1.1  mrg 		      }
   5161  1.1  mrg 		    else if (c == '"')
   5162  1.1  mrg 		      {
   5163  1.1  mrg 		      second_raw:;
   5164  1.1  mrg 			if (!want_number && ISIDNUM (p))
   5165  1.1  mrg 			  {
   5166  1.1  mrg 			    raw = false;
   5167  1.1  mrg 			    break;
   5168  1.1  mrg 			  }
   5169  1.1  mrg 		      }
   5170  1.1  mrg 
   5171  1.1  mrg 		    if (ISDIGIT (p))
   5172  1.1  mrg 		      maybe_number_start = true;
   5173  1.1  mrg 		    else if (p == '.')
   5174  1.1  mrg 		      want_number = true;
   5175  1.1  mrg 		    else if (ISIDNUM (p))
   5176  1.1  mrg 		      maybe_number_start = false;
   5177  1.1  mrg 		    else if (p == '+' || p == '-')
   5178  1.1  mrg 		      {
   5179  1.1  mrg 			if (const unsigned char *peek_prev
   5180  1.1  mrg 			    = do_peek_prev (peek, lwm))
   5181  1.1  mrg 			  {
   5182  1.1  mrg 			    p = *peek_prev;
   5183  1.1  mrg 			    if (p == 'e' || p == 'E'
   5184  1.1  mrg 				|| p == 'p' || p == 'P')
   5185  1.1  mrg 			      {
   5186  1.1  mrg 				want_number = true;
   5187  1.1  mrg 				maybe_number_start = false;
   5188  1.1  mrg 			      }
   5189  1.1  mrg 			    else
   5190  1.1  mrg 			      break;
   5191  1.1  mrg 			  }
   5192  1.1  mrg 			else
   5193  1.1  mrg 			  break;
   5194  1.1  mrg 		      }
   5195  1.1  mrg 		    else if (p == '\'' || p == '\"')
   5196  1.1  mrg 		      {
   5197  1.1  mrg 			/* If this is lwm, this must be the end of a
   5198  1.1  mrg 			   previous string.  So this is a trailing
   5199  1.1  mrg 			   literal type, (a) if those are allowed,
   5200  1.1  mrg 			     and (b) maybe_start is false.  Otherwise
   5201  1.1  mrg 			     this must be a CPP_NUMBER because we've
   5202  1.1  mrg 			     met another ', and we'd have checked that
   5203  1.1  mrg 			     in its own right.  */
   5204  1.1  mrg 			if (peek == lwm && CPP_OPTION (pfile, uliterals))
   5205  1.1  mrg 			  {
   5206  1.1  mrg 			    if  (!maybe_number_start && !want_number)
   5207  1.1  mrg 			      /* Must be a literal type.  */
   5208  1.1  mrg 			      raw = false;
   5209  1.1  mrg 			  }
   5210  1.1  mrg 			else if (p == '\''
   5211  1.1  mrg 				 && CPP_OPTION (pfile, digit_separators))
   5212  1.1  mrg 			  maybe_number_start = true;
   5213  1.1  mrg 			break;
   5214  1.1  mrg 		      }
   5215  1.1  mrg 		    else if (c == '\'')
   5216  1.1  mrg 		      break;
   5217  1.1  mrg 		    else if (!quote_first && !quote_eight)
   5218  1.1  mrg 		      break;
   5219  1.1  mrg 		  }
   5220  1.1  mrg 
   5221  1.1  mrg 		if (maybe_number_start)
   5222  1.1  mrg 		  {
   5223  1.1  mrg 		    if (c == '\'')
   5224  1.1  mrg 		      /* A CPP NUMBER.  */
   5225  1.1  mrg 		      goto dflt;
   5226  1.1  mrg 		    raw = false;
   5227  1.1  mrg 		  }
   5228  1.1  mrg 
   5229  1.1  mrg 		goto delimited_string;
   5230  1.1  mrg 	      }
   5231  1.1  mrg 
   5232  1.1  mrg 	    delimited_string:
   5233  1.1  mrg 	      {
   5234  1.1  mrg 		/* (Possibly raw) string or char literal.  */
   5235  1.1  mrg 		unsigned char end = c;
   5236  1.1  mrg 		int delim_len = -1;
   5237  1.1  mrg 		const unsigned char *delim = NULL;
   5238  1.1  mrg 		location_t sloc = linemap_position_for_column (pfile->line_table,
   5239  1.1  mrg 							       pos - line_start);
   5240  1.1  mrg 		int esc = 0;
   5241  1.1  mrg 
   5242  1.1  mrg 		if (raw)
   5243  1.1  mrg 		  {
   5244  1.1  mrg 		    /* There can be no line breaks in the delimiter.  */
   5245  1.1  mrg 		    delim = pos;
   5246  1.1  mrg 		    for (delim_len = 0; (c = *pos++) != '('; delim_len++)
   5247  1.1  mrg 		      {
   5248  1.1  mrg 			if (delim_len == 16)
   5249  1.1  mrg 			  {
   5250  1.1  mrg 			    cpp_error_with_line (pfile, CPP_DL_ERROR,
   5251  1.1  mrg 						 sloc, 0,
   5252  1.1  mrg 						 "raw string delimiter"
   5253  1.1  mrg 						 " longer than %d"
   5254  1.1  mrg 						 " characters",
   5255  1.1  mrg 						 delim_len);
   5256  1.1  mrg 			    raw = false;
   5257  1.1  mrg 			    pos = delim;
   5258  1.1  mrg 			    break;
   5259  1.1  mrg 			  }
   5260  1.1  mrg 			if (strchr (") \\\t\v\f\n", c))
   5261  1.1  mrg 			  {
   5262  1.1  mrg 			    cpp_error_with_line (pfile, CPP_DL_ERROR,
   5263  1.1  mrg 						 sloc, 0,
   5264  1.1  mrg 						 "invalid character '%c'"
   5265  1.1  mrg 						 " in raw string"
   5266  1.1  mrg 						 " delimiter", c);
   5267  1.1  mrg 			    raw = false;
   5268  1.1  mrg 			    pos = delim;
   5269  1.1  mrg 			    break;
   5270  1.1  mrg 			  }
   5271  1.1  mrg 			if (pos >= limit)
   5272  1.1  mrg 			  goto bad_string;
   5273  1.1  mrg 		      }
   5274  1.1  mrg 		  }
   5275  1.1  mrg 
   5276  1.1  mrg 		while (pos < limit)
   5277  1.1  mrg 		  {
   5278  1.1  mrg 		    char c = *pos++;
   5279  1.1  mrg 		    switch (c)
   5280  1.1  mrg 		      {
   5281  1.1  mrg 		      case '\\':
   5282  1.1  mrg 			if (!raw)
   5283  1.1  mrg 			  esc++;
   5284  1.1  mrg 			break;
   5285  1.1  mrg 
   5286  1.1  mrg 		      case '\r':
   5287  1.1  mrg 			if (*pos == '\n')
   5288  1.1  mrg 			  pos++;
   5289  1.1  mrg 			/* FALLTHROUGH  */
   5290  1.1  mrg 
   5291  1.1  mrg 		      case '\n':
   5292  1.1  mrg 			{
   5293  1.1  mrg 			  CPP_INCREMENT_LINE (pfile, 0);
   5294  1.1  mrg 			  line_count++;
   5295  1.1  mrg 			  line_start = pos;
   5296  1.1  mrg 			}
   5297  1.1  mrg 			if (esc)
   5298  1.1  mrg 			  esc--;
   5299  1.1  mrg 			break;
   5300  1.1  mrg 
   5301  1.1  mrg 		      case ')':
   5302  1.1  mrg 			if (raw
   5303  1.1  mrg 			    && pos + delim_len + 1 < limit
   5304  1.1  mrg 			    && pos[delim_len] == end
   5305  1.1  mrg 			    && !memcmp (delim, pos, delim_len))
   5306  1.1  mrg 			  {
   5307  1.1  mrg 			    pos += delim_len + 1;
   5308  1.1  mrg 			    raw = false;
   5309  1.1  mrg 			    goto done_string;
   5310  1.1  mrg 			  }
   5311  1.1  mrg 			break;
   5312  1.1  mrg 
   5313  1.1  mrg 		      default:
   5314  1.1  mrg 			if (!raw && !(esc & 1) && c == end)
   5315  1.1  mrg 			  goto done_string;
   5316  1.1  mrg 			esc = 0;
   5317  1.1  mrg 			break;
   5318  1.1  mrg 		      }
   5319  1.1  mrg 		  }
   5320  1.1  mrg 	      bad_string:
   5321  1.1  mrg 		cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0,
   5322  1.1  mrg 				     "unterminated literal");
   5323  1.1  mrg 
   5324  1.1  mrg 	      done_string:
   5325  1.1  mrg 		raw = false;
   5326  1.1  mrg 		lwm = pos - 1;
   5327  1.1  mrg 	      }
   5328  1.1  mrg 	      goto dflt;
   5329  1.1  mrg 
   5330  1.1  mrg 	    case '_':
   5331  1.1  mrg 	    case 'e':
   5332  1.1  mrg 	    case 'i':
   5333  1.1  mrg 	    case 'm':
   5334  1.1  mrg 	      if (bol && module_p && !pfile->state.skipping
   5335  1.1  mrg 		  && do_peek_module (pfile, c, pos, limit))
   5336  1.1  mrg 		{
   5337  1.1  mrg 		  /* We've seen the start of a module control line.
   5338  1.1  mrg 		     Start up the tokenizer.  */
   5339  1.1  mrg 		  pos--; /* Backup over the first character.  */
   5340  1.1  mrg 
   5341  1.1  mrg 		  /* Backup over whitespace to start of line.  */
   5342  1.1  mrg 		  while (pos > line_start
   5343  1.1  mrg 			 && (pos[-1] == ' ' || pos[-1] == '\t'))
   5344  1.1  mrg 		    pos--;
   5345  1.1  mrg 
   5346  1.1  mrg 		  if (pos > base)
   5347  1.1  mrg 		    cb (pfile, CPP_DO_print, data, line_count, base, pos - base);
   5348  1.1  mrg 
   5349  1.1  mrg 		  /* Prep things for directive handling. */
   5350  1.1  mrg 		  buffer->next_line = pos;
   5351  1.1  mrg 		  buffer->need_line = true;
   5352  1.1  mrg 
   5353  1.1  mrg 		  /* Now get tokens until the PRAGMA_EOL.  */
   5354  1.1  mrg 		  do
   5355  1.1  mrg 		    {
   5356  1.1  mrg 		      location_t spelling;
   5357  1.1  mrg 		      const cpp_token *tok
   5358  1.1  mrg 			= cpp_get_token_with_location (pfile, &spelling);
   5359  1.1  mrg 
   5360  1.1  mrg 		      gcc_assert (pfile->state.in_deferred_pragma
   5361  1.1  mrg 				  || tok->type == CPP_PRAGMA_EOL);
   5362  1.1  mrg 		      cb (pfile, CPP_DO_token, data, tok, spelling);
   5363  1.1  mrg 		    }
   5364  1.1  mrg 		  while (pfile->state.in_deferred_pragma);
   5365  1.1  mrg 
   5366  1.1  mrg 		  if (pfile->buffer->next_line < pfile->buffer->rlimit)
   5367  1.1  mrg 		    cb (pfile, CPP_DO_location, data,
   5368  1.1  mrg 			pfile->line_table->highest_line);
   5369  1.1  mrg 
   5370  1.1  mrg 		  pfile->mi_valid = false;
   5371  1.1  mrg 		  goto restart;
   5372  1.1  mrg 		}
   5373  1.1  mrg 	      goto dflt;
   5374  1.1  mrg 
   5375  1.1  mrg 	    default:
   5376  1.1  mrg 	    dflt:
   5377  1.1  mrg 	      bol = false;
   5378  1.1  mrg 	      pfile->mi_valid = false;
   5379  1.1  mrg 	      break;
   5380  1.1  mrg 	    }
   5381  1.1  mrg 	}
   5382  1.1  mrg 
   5383  1.1  mrg       if (buffer->rlimit > base && !pfile->state.skipping)
   5384  1.1  mrg 	{
   5385  1.1  mrg 	  const unsigned char *limit = buffer->rlimit;
   5386  1.1  mrg 	  /* If the file was not newline terminated, add rlimit, which is
   5387  1.1  mrg 	     guaranteed to point to a newline, to the end of our range.  */
   5388  1.1  mrg 	  if (limit[-1] != '\n')
   5389  1.1  mrg 	    {
   5390  1.1  mrg 	      limit++;
   5391  1.1  mrg 	      CPP_INCREMENT_LINE (pfile, 0);
   5392  1.1  mrg 	      line_count++;
   5393  1.1  mrg 	    }
   5394  1.1  mrg 	  cb (pfile, CPP_DO_print, data, line_count, base, limit - base);
   5395  1.1  mrg 	}
   5396  1.1  mrg 
   5397  1.1  mrg       _cpp_pop_buffer (pfile);
   5398  1.1  mrg     }
   5399  1.1  mrg   while (pfile->buffer);
   5400           }
   5401