dist/libcpp/lex.cc

1.1  mrg /* CPP Library - lexical analysis.
1.1  mrg    Copyright (C) 2000-2022 Free Software Foundation, Inc.
1.1  mrg    Contributed by Per Bothner, 1994-95.
1.1  mrg    Based on CCCP program by Paul Rubin, June 1986
1.1  mrg    Adapted to ANSI C, Richard Stallman, Jan 1987
1.1  mrg    Broken out to separate file, Zack Weinberg, Mar 2000
1.1  mrg
1.1  mrg This program is free software; you can redistribute it and/or modify it
1.1  mrg under the terms of the GNU General Public License as published by the
1.1  mrg Free Software Foundation; either version 3, or (at your option) any
1.1  mrg later version.
1.1  mrg
1.1  mrg This program is distributed in the hope that it will be useful,
1.1  mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
1.1  mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1.1  mrg GNU General Public License for more details.
1.1  mrg
1.1  mrg You should have received a copy of the GNU General Public License
1.1  mrg along with this program; see the file COPYING3.  If not see
1.1  mrg <http://www.gnu.org/licenses/>.  */
1.1  mrg
1.1  mrg #include "config.h"
1.1  mrg #include "system.h"
1.1  mrg #include "cpplib.h"
1.1  mrg #include "internal.h"
1.1  mrg
1.1  mrg enum spell_type
1.1  mrg {
1.1  mrg   SPELL_OPERATOR = 0,
1.1  mrg   SPELL_IDENT,
1.1  mrg   SPELL_LITERAL,
1.1  mrg   SPELL_NONE
1.1  mrg };
1.1  mrg
1.1  mrg struct token_spelling
1.1  mrg {
1.1  mrg   enum spell_type category;
1.1  mrg   const unsigned char *name;
1.1  mrg };
1.1  mrg
1.1  mrg static const unsigned char *const digraph_spellings[] =
1.1  mrg { UC"%:", UC"%:%:", UC"<:", UC":>", UC"<%", UC"%>" };
1.1  mrg
1.1  mrg #define OP(e, s) { SPELL_OPERATOR, UC s  },
1.1  mrg #define TK(e, s) { SPELL_ ## s,    UC #e },
1.1  mrg static const struct token_spelling token_spellings[N_TTYPES] = { TTYPE_TABLE };
1.1  mrg #undef OP
1.1  mrg #undef TK
1.1  mrg
1.1  mrg #define TOKEN_SPELL(token) (token_spellings[(token)->type].category)
1.1  mrg #define TOKEN_NAME(token) (token_spellings[(token)->type].name)
1.1  mrg
1.1  mrg static void add_line_note (cpp_buffer *, const uchar *, unsigned int);
1.1  mrg static int skip_line_comment (cpp_reader *);
1.1  mrg static void skip_whitespace (cpp_reader *, cppchar_t);
1.1  mrg static void lex_string (cpp_reader *, cpp_token *, const uchar *);
1.1  mrg static void save_comment (cpp_reader *, cpp_token *, const uchar *, cppchar_t);
1.1  mrg static void store_comment (cpp_reader *, cpp_token *);
1.1  mrg static void create_literal (cpp_reader *, cpp_token *, const uchar *,
1.1  mrg 			    unsigned int, enum cpp_ttype);
1.1  mrg static bool warn_in_comment (cpp_reader *, _cpp_line_note *);
1.1  mrg static int name_p (cpp_reader *, const cpp_string *);
1.1  mrg static tokenrun *next_tokenrun (tokenrun *);
1.1  mrg
1.1  mrg static _cpp_buff *new_buff (size_t);
1.1  mrg
1.1  mrg
1.1  mrg /* Utility routine:
1.1  mrg
1.1  mrg    Compares, the token TOKEN to the NUL-terminated string STRING.
1.1  mrg    TOKEN must be a CPP_NAME.  Returns 1 for equal, 0 for unequal.  */
1.1  mrg int
1.1  mrg cpp_ideq (const cpp_token *token, const char *string)
1.1  mrg {
1.1  mrg   if (token->type != CPP_NAME)
1.1  mrg     return 0;
1.1  mrg
1.1  mrg   return !ustrcmp (NODE_NAME (token->val.node.node), (const uchar *) string);
1.1  mrg }
1.1  mrg
1.1  mrg /* Record a note TYPE at byte POS into the current cleaned logical
1.1  mrg    line.  */
1.1  mrg static void
1.1  mrg add_line_note (cpp_buffer *buffer, const uchar *pos, unsigned int type)
1.1  mrg {
1.1  mrg   if (buffer->notes_used == buffer->notes_cap)
1.1  mrg     {
1.1  mrg       buffer->notes_cap = buffer->notes_cap * 2 + 200;
1.1  mrg       buffer->notes = XRESIZEVEC (_cpp_line_note, buffer->notes,
1.1  mrg                                   buffer->notes_cap);
1.1  mrg     }
1.1  mrg
1.1  mrg   buffer->notes[buffer->notes_used].pos = pos;
1.1  mrg   buffer->notes[buffer->notes_used].type = type;
1.1  mrg   buffer->notes_used++;
1.1  mrg }
1.1  mrg
1.1  mrg
1.1  mrg /* Fast path to find line special characters using optimized character
1.1  mrg    scanning algorithms.  Anything complicated falls back to the slow
1.1  mrg    path below.  Since this loop is very hot it's worth doing these kinds
1.1  mrg    of optimizations.
1.1  mrg
1.1  mrg    One of the paths through the ifdefs should provide
1.1  mrg
1.1  mrg      const uchar *search_line_fast (const uchar *s, const uchar *end);
1.1  mrg
1.1  mrg    Between S and END, search for \n, \r, \\, ?.  Return a pointer to
1.1  mrg    the found character.
1.1  mrg
1.1  mrg    Note that the last character of the buffer is *always* a newline,
1.1  mrg    as forced by _cpp_convert_input.  This fact can be used to avoid
1.1  mrg    explicitly looking for the end of the buffer.  */
1.1  mrg
1.1  mrg /* Configure gives us an ifdef test.  */
1.1  mrg #ifndef WORDS_BIGENDIAN
1.1  mrg #define WORDS_BIGENDIAN 0
1.1  mrg #endif
1.1  mrg
1.1  mrg /* We'd like the largest integer that fits into a register.  There's nothing
1.1  mrg    in <stdint.h> that gives us that.  For most hosts this is unsigned long,
1.1  mrg    but MS decided on an LLP64 model.  Thankfully when building with GCC we
1.1  mrg    can get the "real" word size.  */
1.1  mrg #ifdef __GNUC__
1.1  mrg typedef unsigned int word_type __attribute__((__mode__(__word__)));
1.1  mrg #else
1.1  mrg typedef unsigned long word_type;
1.1  mrg #endif
1.1  mrg
1.1  mrg /* The code below is only expecting sizes 4 or 8.
1.1  mrg    Die at compile-time if this expectation is violated.  */
1.1  mrg typedef char check_word_type_size
1.1  mrg   [(sizeof(word_type) == 8 || sizeof(word_type) == 4) * 2 - 1];
1.1  mrg
1.1  mrg /* Return X with the first N bytes forced to values that won't match one
1.1  mrg    of the interesting characters.  Note that NUL is not interesting.  */
1.1  mrg
1.1  mrg static inline word_type
1.1  mrg acc_char_mask_misalign (word_type val, unsigned int n)
1.1  mrg {
1.1  mrg   word_type mask = -1;
1.1  mrg   if (WORDS_BIGENDIAN)
1.1  mrg     mask >>= n * 8;
1.1  mrg   else
1.1  mrg     mask <<= n * 8;
1.1  mrg   return val & mask;
1.1  mrg }
1.1  mrg
1.1  mrg /* Return X replicated to all byte positions within WORD_TYPE.  */
1.1  mrg
1.1  mrg static inline word_type
1.1  mrg acc_char_replicate (uchar x)
1.1  mrg {
1.1  mrg   word_type ret;
1.1  mrg
1.1  mrg   ret = (x << 24) | (x << 16) | (x << 8) | x;
1.1  mrg   if (sizeof(word_type) == 8)
1.1  mrg     ret = (ret << 16 << 16) | ret;
1.1  mrg   return ret;
1.1  mrg }
1.1  mrg
1.1  mrg /* Return non-zero if some byte of VAL is (probably) C.  */
1.1  mrg
1.1  mrg static inline word_type
1.1  mrg acc_char_cmp (word_type val, word_type c)
1.1  mrg {
1.1  mrg #if defined(__GNUC__) && defined(__alpha__)
1.1  mrg   /* We can get exact results using a compare-bytes instruction.
1.1  mrg      Get (val == c) via (0 >= (val ^ c)).  */
1.1  mrg   return __builtin_alpha_cmpbge (0, val ^ c);
1.1  mrg #else
1.1  mrg   word_type magic = 0x7efefefeU;
1.1  mrg   if (sizeof(word_type) == 8)
1.1  mrg     magic = (magic << 16 << 16) | 0xfefefefeU;
1.1  mrg   magic |= 1;
1.1  mrg
1.1  mrg   val ^= c;
1.1  mrg   return ((val + magic) ^ ~val) & ~magic;
1.1  mrg #endif
1.1  mrg }
1.1  mrg
1.1  mrg /* Given the result of acc_char_cmp is non-zero, return the index of
1.1  mrg    the found character.  If this was a false positive, return -1.  */
1.1  mrg
1.1  mrg static inline int
1.1  mrg acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
1.1  mrg 		word_type val ATTRIBUTE_UNUSED)
1.1  mrg {
1.1  mrg #if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
1.1  mrg   /* The cmpbge instruction sets *bits* of the result corresponding to
1.1  mrg      matches in the bytes with no false positives.  */
1.1  mrg   return __builtin_ctzl (cmp);
1.1  mrg #else
1.1  mrg   unsigned int i;
1.1  mrg
1.1  mrg   /* ??? It would be nice to force unrolling here,
1.1  mrg      and have all of these constants folded.  */
1.1  mrg   for (i = 0; i < sizeof(word_type); ++i)
1.1  mrg     {
1.1  mrg       uchar c;
1.1  mrg       if (WORDS_BIGENDIAN)
1.1  mrg 	c = (val >> (sizeof(word_type) - i - 1) * 8) & 0xff;
1.1  mrg       else
1.1  mrg 	c = (val >> i * 8) & 0xff;
1.1  mrg
1.1  mrg       if (c == '\n' || c == '\r' || c == '\\' || c == '?')
1.1  mrg 	return i;
1.1  mrg     }
1.1  mrg
1.1  mrg   return -1;
1.1  mrg #endif
1.1  mrg }
1.1  mrg
1.1  mrg /* A version of the fast scanner using bit fiddling techniques.
1.1  mrg
1.1  mrg    For 32-bit words, one would normally perform 16 comparisons and
1.1  mrg    16 branches.  With this algorithm one performs 24 arithmetic
1.1  mrg    operations and one branch.  Whether this is faster with a 32-bit
1.1  mrg    word size is going to be somewhat system dependent.
1.1  mrg
1.1  mrg    For 64-bit words, we eliminate twice the number of comparisons
1.1  mrg    and branches without increasing the number of arithmetic operations.
1.1  mrg    It's almost certainly going to be a win with 64-bit word size.  */
1.1  mrg
1.1  mrg static const uchar * search_line_acc_char (const uchar *, const uchar *)
1.1  mrg   ATTRIBUTE_UNUSED;
1.1  mrg
1.1  mrg static const uchar *
1.1  mrg search_line_acc_char (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
1.1  mrg {
1.1  mrg   const word_type repl_nl = acc_char_replicate ('\n');
1.1  mrg   const word_type repl_cr = acc_char_replicate ('\r');
1.1  mrg   const word_type repl_bs = acc_char_replicate ('\\');
1.1  mrg   const word_type repl_qm = acc_char_replicate ('?');
1.1  mrg
1.1  mrg   unsigned int misalign;
1.1  mrg   const word_type *p;
1.1  mrg   word_type val, t;
1.1  mrg
1.1  mrg   /* Align the buffer.  Mask out any bytes from before the beginning.  */
1.1  mrg   p = (word_type *)((uintptr_t)s & -sizeof(word_type));
1.1  mrg   val = *p;
1.1  mrg   misalign = (uintptr_t)s & (sizeof(word_type) - 1);
1.1  mrg   if (misalign)
1.1  mrg     val = acc_char_mask_misalign (val, misalign);
1.1  mrg
1.1  mrg   /* Main loop.  */
1.1  mrg   while (1)
1.1  mrg     {
1.1  mrg       t  = acc_char_cmp (val, repl_nl);
1.1  mrg       t |= acc_char_cmp (val, repl_cr);
1.1  mrg       t |= acc_char_cmp (val, repl_bs);
1.1  mrg       t |= acc_char_cmp (val, repl_qm);
1.1  mrg
1.1  mrg       if (__builtin_expect (t != 0, 0))
1.1  mrg 	{
1.1  mrg 	  int i = acc_char_index (t, val);
1.1  mrg 	  if (i >= 0)
1.1  mrg 	    return (const uchar *)p + i;
1.1  mrg 	}
1.1  mrg
1.1  mrg       val = *++p;
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Disable on Solaris 2/x86 until the following problem can be properly
1.1  mrg    autoconfed:
1.1  mrg
1.1  mrg    The Solaris 10+ assembler tags objects with the instruction set
1.1  mrg    extensions used, so SSE4.2 executables cannot run on machines that
1.1  mrg    don't support that extension.  */
1.1  mrg
1.1  mrg #if (GCC_VERSION >= 4005) && (__GNUC__ >= 5 || !defined(__PIC__)) && (defined(__i386__) || defined(__x86_64__)) && !(defined(__sun__) && defined(__svr4__))
1.1  mrg
1.1  mrg /* Replicated character data to be shared between implementations.
1.1  mrg    Recall that outside of a context with vector support we can't
1.1  mrg    define compatible vector types, therefore these are all defined
1.1  mrg    in terms of raw characters.  */
1.1  mrg static const char repl_chars[4][16] __attribute__((aligned(16))) = {
1.1  mrg   { '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
1.1  mrg     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n' },
1.1  mrg   { '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
1.1  mrg     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r' },
1.1  mrg   { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
1.1  mrg     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' },
1.1  mrg   { '?', '?', '?', '?', '?', '?', '?', '?',
1.1  mrg     '?', '?', '?', '?', '?', '?', '?', '?' },
1.1  mrg };
1.1  mrg
1.1  mrg /* A version of the fast scanner using MMX vectorized byte compare insns.
1.1  mrg
1.1  mrg    This uses the PMOVMSKB instruction which was introduced with "MMX2",
1.1  mrg    which was packaged into SSE1; it is also present in the AMD MMX
1.1  mrg    extension.  Mark the function as using "sse" so that we emit a real
1.1  mrg    "emms" instruction, rather than the 3dNOW "femms" instruction.  */
1.1  mrg
1.1  mrg static const uchar *
1.1  mrg #ifndef __SSE__
1.1  mrg __attribute__((__target__("sse")))
1.1  mrg #endif
1.1  mrg search_line_mmx (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
1.1  mrg {
1.1  mrg   typedef char v8qi __attribute__ ((__vector_size__ (8)));
1.1  mrg   typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
1.1  mrg
1.1  mrg   const v8qi repl_nl = *(const v8qi *)repl_chars[0];
1.1  mrg   const v8qi repl_cr = *(const v8qi *)repl_chars[1];
1.1  mrg   const v8qi repl_bs = *(const v8qi *)repl_chars[2];
1.1  mrg   const v8qi repl_qm = *(const v8qi *)repl_chars[3];
1.1  mrg
1.1  mrg   unsigned int misalign, found, mask;
1.1  mrg   const v8qi *p;
1.1  mrg   v8qi data, t, c;
1.1  mrg
1.1  mrg   /* Align the source pointer.  While MMX doesn't generate unaligned data
1.1  mrg      faults, this allows us to safely scan to the end of the buffer without
1.1  mrg      reading beyond the end of the last page.  */
1.1  mrg   misalign = (uintptr_t)s & 7;
1.1  mrg   p = (const v8qi *)((uintptr_t)s & -8);
1.1  mrg   data = *p;
1.1  mrg
1.1  mrg   /* Create a mask for the bytes that are valid within the first
1.1  mrg      16-byte block.  The Idea here is that the AND with the mask
1.1  mrg      within the loop is "free", since we need some AND or TEST
1.1  mrg      insn in order to set the flags for the branch anyway.  */
1.1  mrg   mask = -1u << misalign;
1.1  mrg
1.1  mrg   /* Main loop processing 8 bytes at a time.  */
1.1  mrg   goto start;
1.1  mrg   do
1.1  mrg     {
1.1  mrg       data = *++p;
1.1  mrg       mask = -1;
1.1  mrg
1.1  mrg     start:
1.1  mrg       t = __builtin_ia32_pcmpeqb(data, repl_nl);
1.1  mrg       c = __builtin_ia32_pcmpeqb(data, repl_cr);
1.1  mrg       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
1.1  mrg       c = __builtin_ia32_pcmpeqb(data, repl_bs);
1.1  mrg       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
1.1  mrg       c = __builtin_ia32_pcmpeqb(data, repl_qm);
1.1  mrg       t = (v8qi) __builtin_ia32_por ((__m64)t, (__m64)c);
1.1  mrg       found = __builtin_ia32_pmovmskb (t);
1.1  mrg       found &= mask;
1.1  mrg     }
1.1  mrg   while (!found);
1.1  mrg
1.1  mrg   __builtin_ia32_emms ();
1.1  mrg
1.1  mrg   /* FOUND contains 1 in bits for which we matched a relevant
1.1  mrg      character.  Conversion to the byte index is trivial.  */
1.1  mrg   found = __builtin_ctz(found);
1.1  mrg   return (const uchar *)p + found;
1.1  mrg }
1.1  mrg
1.1  mrg /* A version of the fast scanner using SSE2 vectorized byte compare insns.  */
1.1  mrg
1.1  mrg static const uchar *
1.1  mrg #ifndef __SSE2__
1.1  mrg __attribute__((__target__("sse2")))
1.1  mrg #endif
1.1  mrg search_line_sse2 (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
1.1  mrg {
1.1  mrg   typedef char v16qi __attribute__ ((__vector_size__ (16)));
1.1  mrg
1.1  mrg   const v16qi repl_nl = *(const v16qi *)repl_chars[0];
1.1  mrg   const v16qi repl_cr = *(const v16qi *)repl_chars[1];
1.1  mrg   const v16qi repl_bs = *(const v16qi *)repl_chars[2];
1.1  mrg   const v16qi repl_qm = *(const v16qi *)repl_chars[3];
1.1  mrg
1.1  mrg   unsigned int misalign, found, mask;
1.1  mrg   const v16qi *p;
1.1  mrg   v16qi data, t;
1.1  mrg
1.1  mrg   /* Align the source pointer.  */
1.1  mrg   misalign = (uintptr_t)s & 15;
1.1  mrg   p = (const v16qi *)((uintptr_t)s & -16);
1.1  mrg   data = *p;
1.1  mrg
1.1  mrg   /* Create a mask for the bytes that are valid within the first
1.1  mrg      16-byte block.  The Idea here is that the AND with the mask
1.1  mrg      within the loop is "free", since we need some AND or TEST
1.1  mrg      insn in order to set the flags for the branch anyway.  */
1.1  mrg   mask = -1u << misalign;
1.1  mrg
1.1  mrg   /* Main loop processing 16 bytes at a time.  */
1.1  mrg   goto start;
1.1  mrg   do
1.1  mrg     {
1.1  mrg       data = *++p;
1.1  mrg       mask = -1;
1.1  mrg
1.1  mrg     start:
1.1  mrg       t  = data == repl_nl;
1.1  mrg       t |= data == repl_cr;
1.1  mrg       t |= data == repl_bs;
1.1  mrg       t |= data == repl_qm;
1.1  mrg       found = __builtin_ia32_pmovmskb128 (t);
1.1  mrg       found &= mask;
1.1  mrg     }
1.1  mrg   while (!found);
1.1  mrg
1.1  mrg   /* FOUND contains 1 in bits for which we matched a relevant
1.1  mrg      character.  Conversion to the byte index is trivial.  */
1.1  mrg   found = __builtin_ctz(found);
1.1  mrg   return (const uchar *)p + found;
1.1  mrg }
1.1  mrg
1.1  mrg #ifdef HAVE_SSE4
1.1  mrg /* A version of the fast scanner using SSE 4.2 vectorized string insns.  */
1.1  mrg
1.1  mrg static const uchar *
1.1  mrg #ifndef __SSE4_2__
1.1  mrg __attribute__((__target__("sse4.2")))
1.1  mrg #endif
1.1  mrg search_line_sse42 (const uchar *s, const uchar *end)
1.1  mrg {
1.1  mrg   typedef char v16qi __attribute__ ((__vector_size__ (16)));
1.1  mrg   static const v16qi search = { '\n', '\r', '?', '\\' };
1.1  mrg
1.1  mrg   uintptr_t si = (uintptr_t)s;
1.1  mrg   uintptr_t index;
1.1  mrg
1.1  mrg   /* Check for unaligned input.  */
1.1  mrg   if (si & 15)
1.1  mrg     {
1.1  mrg       v16qi sv;
1.1  mrg
1.1  mrg       if (__builtin_expect (end - s < 16, 0)
1.1  mrg 	  && __builtin_expect ((si & 0xfff) > 0xff0, 0))
1.1  mrg 	{
1.1  mrg 	  /* There are less than 16 bytes left in the buffer, and less
1.1  mrg 	     than 16 bytes left on the page.  Reading 16 bytes at this
1.1  mrg 	     point might generate a spurious page fault.  Defer to the
1.1  mrg 	     SSE2 implementation, which already handles alignment.  */
1.1  mrg 	  return search_line_sse2 (s, end);
1.1  mrg 	}
1.1  mrg
1.1  mrg       /* ??? The builtin doesn't understand that the PCMPESTRI read from
1.1  mrg 	 memory need not be aligned.  */
1.1  mrg       sv = __builtin_ia32_loaddqu ((const char *) s);
1.1  mrg       index = __builtin_ia32_pcmpestri128 (search, 4, sv, 16, 0);
1.1  mrg
1.1  mrg       if (__builtin_expect (index < 16, 0))
1.1  mrg 	goto found;
1.1  mrg
1.1  mrg       /* Advance the pointer to an aligned address.  We will re-scan a
1.1  mrg 	 few bytes, but we no longer need care for reading past the
1.1  mrg 	 end of a page, since we're guaranteed a match.  */
1.1  mrg       s = (const uchar *)((si + 15) & -16);
1.1  mrg     }
1.1  mrg
1.1  mrg   /* Main loop, processing 16 bytes at a time.  */
1.1  mrg #ifdef __GCC_ASM_FLAG_OUTPUTS__
1.1  mrg   while (1)
1.1  mrg     {
1.1  mrg       char f;
1.1  mrg
1.1  mrg       /* By using inline assembly instead of the builtin,
1.1  mrg 	 we can use the result, as well as the flags set.  */
1.1  mrg       __asm ("%vpcmpestri\t$0, %2, %3"
1.1  mrg 	     : "=c"(index), "=@ccc"(f)
1.1  mrg 	     : "m"(*s), "x"(search), "a"(4), "d"(16));
1.1  mrg       if (f)
1.1  mrg 	break;
1.1  mrg
1.1  mrg       s += 16;
1.1  mrg     }
1.1  mrg #else
1.1  mrg   s -= 16;
1.1  mrg   /* By doing the whole loop in inline assembly,
1.1  mrg      we can make proper use of the flags set.  */
1.1  mrg   __asm (      ".balign 16\n"
1.1  mrg 	"0:	add $16, %1\n"
1.1  mrg 	"	%vpcmpestri\t$0, (%1), %2\n"
1.1  mrg 	"	jnc 0b"
1.1  mrg 	: "=&c"(index), "+r"(s)
1.1  mrg 	: "x"(search), "a"(4), "d"(16));
1.1  mrg #endif
1.1  mrg
1.1  mrg  found:
1.1  mrg   return s + index;
1.1  mrg }
1.1  mrg
1.1  mrg #else
1.1  mrg /* Work around out-dated assemblers without sse4 support.  */
1.1  mrg #define search_line_sse42 search_line_sse2
1.1  mrg #endif
1.1  mrg
1.1  mrg /* Check the CPU capabilities.  */
1.1  mrg
1.1  mrg #include "../gcc/config/i386/cpuid.h"
1.1  mrg
1.1  mrg typedef const uchar * (*search_line_fast_type) (const uchar *, const uchar *);
1.1  mrg static search_line_fast_type search_line_fast;
1.1  mrg
1.1  mrg #define HAVE_init_vectorized_lexer 1
1.1  mrg static inline void
1.1  mrg init_vectorized_lexer (void)
1.1  mrg {
1.1  mrg   unsigned dummy, ecx = 0, edx = 0;
1.1  mrg   search_line_fast_type impl = search_line_acc_char;
1.1  mrg   int minimum = 0;
1.1  mrg
1.1  mrg #if defined(__SSE4_2__)
1.1  mrg   minimum = 3;
1.1  mrg #elif defined(__SSE2__)
1.1  mrg   minimum = 2;
1.1  mrg #elif defined(__SSE__)
1.1  mrg   minimum = 1;
1.1  mrg #endif
1.1  mrg
1.1  mrg   if (minimum == 3)
1.1  mrg     impl = search_line_sse42;
1.1  mrg   else if (__get_cpuid (1, &dummy, &dummy, &ecx, &edx) || minimum == 2)
1.1  mrg     {
1.1  mrg       if (minimum == 3 || (ecx & bit_SSE4_2))
1.1  mrg         impl = search_line_sse42;
1.1  mrg       else if (minimum == 2 || (edx & bit_SSE2))
1.1  mrg 	impl = search_line_sse2;
1.1  mrg       else if (minimum == 1 || (edx & bit_SSE))
1.1  mrg 	impl = search_line_mmx;
1.1  mrg     }
1.1  mrg   else if (__get_cpuid (0x80000001, &dummy, &dummy, &dummy, &edx))
1.1  mrg     {
1.1  mrg       if (minimum == 1
1.1  mrg 	  || (edx & (bit_MMXEXT | bit_CMOV)) == (bit_MMXEXT | bit_CMOV))
1.1  mrg 	impl = search_line_mmx;
1.1  mrg     }
1.1  mrg
1.1  mrg   search_line_fast = impl;
1.1  mrg }
1.1  mrg
1.1  mrg #elif (GCC_VERSION >= 4005) && defined(_ARCH_PWR8) && defined(__ALTIVEC__)
1.1  mrg
1.1  mrg /* A vection of the fast scanner using AltiVec vectorized byte compares
1.1  mrg    and VSX unaligned loads (when VSX is available).  This is otherwise
1.1  mrg    the same as the AltiVec version.  */
1.1  mrg
1.1  mrg ATTRIBUTE_NO_SANITIZE_UNDEFINED
1.1  mrg static const uchar *
1.1  mrg search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
1.1  mrg {
1.1  mrg   typedef __attribute__((altivec(vector))) unsigned char vc;
1.1  mrg
1.1  mrg   const vc repl_nl = {
1.1  mrg     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
1.1  mrg     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
1.1  mrg   };
1.1  mrg   const vc repl_cr = {
1.1  mrg     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
1.1  mrg     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
1.1  mrg   };
1.1  mrg   const vc repl_bs = {
1.1  mrg     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
1.1  mrg     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
1.1  mrg   };
1.1  mrg   const vc repl_qm = {
1.1  mrg     '?', '?', '?', '?', '?', '?', '?', '?',
1.1  mrg     '?', '?', '?', '?', '?', '?', '?', '?',
1.1  mrg   };
1.1  mrg   const vc zero = { 0 };
1.1  mrg
1.1  mrg   vc data, t;
1.1  mrg
1.1  mrg   /* Main loop processing 16 bytes at a time.  */
1.1  mrg   do
1.1  mrg     {
1.1  mrg       vc m_nl, m_cr, m_bs, m_qm;
1.1  mrg
1.1  mrg       data = __builtin_vec_vsx_ld (0, s);
1.1  mrg       s += 16;
1.1  mrg
1.1  mrg       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
1.1  mrg       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
1.1  mrg       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
1.1  mrg       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
1.1  mrg       t = (m_nl | m_cr) | (m_bs | m_qm);
1.1  mrg
1.1  mrg       /* T now contains 0xff in bytes for which we matched one of the relevant
1.1  mrg 	 characters.  We want to exit the loop if any byte in T is non-zero.
1.1  mrg 	 Below is the expansion of vec_any_ne(t, zero).  */
1.1  mrg     }
1.1  mrg   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
1.1  mrg
1.1  mrg   /* Restore s to to point to the 16 bytes we just processed.  */
1.1  mrg   s -= 16;
1.1  mrg
1.1  mrg   {
1.1  mrg #define N  (sizeof(vc) / sizeof(long))
1.1  mrg
1.1  mrg     union {
1.1  mrg       vc v;
1.1  mrg       /* Statically assert that N is 2 or 4.  */
1.1  mrg       unsigned long l[(N == 2 || N == 4) ? N : -1];
1.1  mrg     } u;
1.1  mrg     unsigned long l, i = 0;
1.1  mrg
1.1  mrg     u.v = t;
1.1  mrg
1.1  mrg     /* Find the first word of T that is non-zero.  */
1.1  mrg     switch (N)
1.1  mrg       {
1.1  mrg       case 4:
1.1  mrg 	l = u.l[i++];
1.1  mrg 	if (l != 0)
1.1  mrg 	  break;
1.1  mrg 	s += sizeof(unsigned long);
1.1  mrg 	l = u.l[i++];
1.1  mrg 	if (l != 0)
1.1  mrg 	  break;
1.1  mrg 	s += sizeof(unsigned long);
1.1  mrg 	/* FALLTHRU */
1.1  mrg       case 2:
1.1  mrg 	l = u.l[i++];
1.1  mrg 	if (l != 0)
1.1  mrg 	  break;
1.1  mrg 	s += sizeof(unsigned long);
1.1  mrg 	l = u.l[i];
1.1  mrg       }
1.1  mrg
1.1  mrg     /* L now contains 0xff in bytes for which we matched one of the
1.1  mrg        relevant characters.  We can find the byte index by finding
1.1  mrg        its bit index and dividing by 8.  */
1.1  mrg #ifdef __BIG_ENDIAN__
1.1  mrg     l = __builtin_clzl(l) >> 3;
1.1  mrg #else
1.1  mrg     l = __builtin_ctzl(l) >> 3;
1.1  mrg #endif
1.1  mrg     return s + l;
1.1  mrg
1.1  mrg #undef N
1.1  mrg   }
1.1  mrg }
1.1  mrg
1.1  mrg #elif (GCC_VERSION >= 4005) && defined(__ALTIVEC__) && defined (__BIG_ENDIAN__)
1.1  mrg
1.1  mrg /* A vection of the fast scanner using AltiVec vectorized byte compares.
1.1  mrg    This cannot be used for little endian because vec_lvsl/lvsr are
1.1  mrg    deprecated for little endian and the code won't work properly.  */
1.1  mrg /* ??? Unfortunately, attribute(target("altivec")) is not yet supported,
1.1  mrg    so we can't compile this function without -maltivec on the command line
1.1  mrg    (or implied by some other switch).  */
1.1  mrg
1.1  mrg static const uchar *
1.1  mrg search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
1.1  mrg {
1.1  mrg   typedef __attribute__((altivec(vector))) unsigned char vc;
1.1  mrg
1.1  mrg   const vc repl_nl = {
1.1  mrg     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n',
1.1  mrg     '\n', '\n', '\n', '\n', '\n', '\n', '\n', '\n'
1.1  mrg   };
1.1  mrg   const vc repl_cr = {
1.1  mrg     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r',
1.1  mrg     '\r', '\r', '\r', '\r', '\r', '\r', '\r', '\r'
1.1  mrg   };
1.1  mrg   const vc repl_bs = {
1.1  mrg     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\',
1.1  mrg     '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\'
1.1  mrg   };
1.1  mrg   const vc repl_qm = {
1.1  mrg     '?', '?', '?', '?', '?', '?', '?', '?',
1.1  mrg     '?', '?', '?', '?', '?', '?', '?', '?',
1.1  mrg   };
1.1  mrg   const vc ones = {
1.1  mrg     -1, -1, -1, -1, -1, -1, -1, -1,
1.1  mrg     -1, -1, -1, -1, -1, -1, -1, -1,
1.1  mrg   };
1.1  mrg   const vc zero = { 0 };
1.1  mrg
1.1  mrg   vc data, mask, t;
1.1  mrg
1.1  mrg   /* Altivec loads automatically mask addresses with -16.  This lets us
1.1  mrg      issue the first load as early as possible.  */
1.1  mrg   data = __builtin_vec_ld(0, (const vc *)s);
1.1  mrg
1.1  mrg   /* Discard bytes before the beginning of the buffer.  Do this by
1.1  mrg      beginning with all ones and shifting in zeros according to the
1.1  mrg      mis-alignment.  The LVSR instruction pulls the exact shift we
1.1  mrg      want from the address.  */
1.1  mrg   mask = __builtin_vec_lvsr(0, s);
1.1  mrg   mask = __builtin_vec_perm(zero, ones, mask);
1.1  mrg   data &= mask;
1.1  mrg
1.1  mrg   /* While altivec loads mask addresses, we still need to align S so
1.1  mrg      that the offset we compute at the end is correct.  */
1.1  mrg   s = (const uchar *)((uintptr_t)s & -16);
1.1  mrg
1.1  mrg   /* Main loop processing 16 bytes at a time.  */
1.1  mrg   goto start;
1.1  mrg   do
1.1  mrg     {
1.1  mrg       vc m_nl, m_cr, m_bs, m_qm;
1.1  mrg
1.1  mrg       s += 16;
1.1  mrg       data = __builtin_vec_ld(0, (const vc *)s);
1.1  mrg
1.1  mrg     start:
1.1  mrg       m_nl = (vc) __builtin_vec_cmpeq(data, repl_nl);
1.1  mrg       m_cr = (vc) __builtin_vec_cmpeq(data, repl_cr);
1.1  mrg       m_bs = (vc) __builtin_vec_cmpeq(data, repl_bs);
1.1  mrg       m_qm = (vc) __builtin_vec_cmpeq(data, repl_qm);
1.1  mrg       t = (m_nl | m_cr) | (m_bs | m_qm);
1.1  mrg
1.1  mrg       /* T now contains 0xff in bytes for which we matched one of the relevant
1.1  mrg 	 characters.  We want to exit the loop if any byte in T is non-zero.
1.1  mrg 	 Below is the expansion of vec_any_ne(t, zero).  */
1.1  mrg     }
1.1  mrg   while (!__builtin_vec_vcmpeq_p(/*__CR6_LT_REV*/3, t, zero));
1.1  mrg
1.1  mrg   {
1.1  mrg #define N  (sizeof(vc) / sizeof(long))
1.1  mrg
1.1  mrg     union {
1.1  mrg       vc v;
1.1  mrg       /* Statically assert that N is 2 or 4.  */
1.1  mrg       unsigned long l[(N == 2 || N == 4) ? N : -1];
1.1  mrg     } u;
1.1  mrg     unsigned long l, i = 0;
1.1  mrg
1.1  mrg     u.v = t;
1.1  mrg
1.1  mrg     /* Find the first word of T that is non-zero.  */
1.1  mrg     switch (N)
1.1  mrg       {
1.1  mrg       case 4:
1.1  mrg 	l = u.l[i++];
1.1  mrg 	if (l != 0)
1.1  mrg 	  break;
1.1  mrg 	s += sizeof(unsigned long);
1.1  mrg 	l = u.l[i++];
1.1  mrg 	if (l != 0)
1.1  mrg 	  break;
1.1  mrg 	s += sizeof(unsigned long);
1.1  mrg 	/* FALLTHROUGH */
1.1  mrg       case 2:
1.1  mrg 	l = u.l[i++];
1.1  mrg 	if (l != 0)
1.1  mrg 	  break;
1.1  mrg 	s += sizeof(unsigned long);
1.1  mrg 	l = u.l[i];
1.1  mrg       }
1.1  mrg
1.1  mrg     /* L now contains 0xff in bytes for which we matched one of the
1.1  mrg        relevant characters.  We can find the byte index by finding
1.1  mrg        its bit index and dividing by 8.  */
1.1  mrg     l = __builtin_clzl(l) >> 3;
1.1  mrg     return s + l;
1.1  mrg
1.1  mrg #undef N
1.1  mrg   }
1.1  mrg }
1.1  mrg
1.1  mrg #elif defined (__ARM_NEON) && defined (__ARM_64BIT_STATE)
1.1  mrg #include "arm_neon.h"
1.1  mrg
1.1  mrg /* This doesn't have to be the exact page size, but no system may use
1.1  mrg    a size smaller than this.  ARMv8 requires a minimum page size of
1.1  mrg    4k.  The impact of being conservative here is a small number of
1.1  mrg    cases will take the slightly slower entry path into the main
1.1  mrg    loop.  */
1.1  mrg
1.1  mrg #define AARCH64_MIN_PAGE_SIZE 4096
1.1  mrg
1.1  mrg static const uchar *
1.1  mrg search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
1.1  mrg {
1.1  mrg   const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
1.1  mrg   const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
1.1  mrg   const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
1.1  mrg   const uint8x16_t repl_qm = vdupq_n_u8 ('?');
1.1  mrg   const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
1.1  mrg
1.1  mrg #ifdef __ARM_BIG_ENDIAN
1.1  mrg   const int16x8_t shift = {8, 8, 8, 8, 0, 0, 0, 0};
1.1  mrg #else
1.1  mrg   const int16x8_t shift = {0, 0, 0, 0, 8, 8, 8, 8};
1.1  mrg #endif
1.1  mrg
1.1  mrg   unsigned int found;
1.1  mrg   const uint8_t *p;
1.1  mrg   uint8x16_t data;
1.1  mrg   uint8x16_t t;
1.1  mrg   uint16x8_t m;
1.1  mrg   uint8x16_t u, v, w;
1.1  mrg
1.1  mrg   /* Align the source pointer.  */
1.1  mrg   p = (const uint8_t *)((uintptr_t)s & -16);
1.1  mrg
1.1  mrg   /* Assuming random string start positions, with a 4k page size we'll take
1.1  mrg      the slow path about 0.37% of the time.  */
1.1  mrg   if (__builtin_expect ((AARCH64_MIN_PAGE_SIZE
1.1  mrg 			 - (((uintptr_t) s) & (AARCH64_MIN_PAGE_SIZE - 1)))
1.1  mrg 			< 16, 0))
1.1  mrg     {
1.1  mrg       /* Slow path: the string starts near a possible page boundary.  */
1.1  mrg       uint32_t misalign, mask;
1.1  mrg
1.1  mrg       misalign = (uintptr_t)s & 15;
1.1  mrg       mask = (-1u << misalign) & 0xffff;
1.1  mrg       data = vld1q_u8 (p);
1.1  mrg       t = vceqq_u8 (data, repl_nl);
1.1  mrg       u = vceqq_u8 (data, repl_cr);
1.1  mrg       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
1.1  mrg       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
1.1  mrg       t = vorrq_u8 (v, w);
1.1  mrg       t = vandq_u8 (t, xmask);
1.1  mrg       m = vpaddlq_u8 (t);
1.1  mrg       m = vshlq_u16 (m, shift);
1.1  mrg       found = vaddvq_u16 (m);
1.1  mrg       found &= mask;
1.1  mrg       if (found)
1.1  mrg 	return (const uchar*)p + __builtin_ctz (found);
1.1  mrg     }
1.1  mrg   else
1.1  mrg     {
1.1  mrg       data = vld1q_u8 ((const uint8_t *) s);
1.1  mrg       t = vceqq_u8 (data, repl_nl);
1.1  mrg       u = vceqq_u8 (data, repl_cr);
1.1  mrg       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
1.1  mrg       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
1.1  mrg       t = vorrq_u8 (v, w);
1.1  mrg       if (__builtin_expect (vpaddd_u64 ((uint64x2_t)t) != 0, 0))
1.1  mrg 	goto done;
1.1  mrg     }
1.1  mrg
1.1  mrg   do
1.1  mrg     {
1.1  mrg       p += 16;
1.1  mrg       data = vld1q_u8 (p);
1.1  mrg       t = vceqq_u8 (data, repl_nl);
1.1  mrg       u = vceqq_u8 (data, repl_cr);
1.1  mrg       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
1.1  mrg       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
1.1  mrg       t = vorrq_u8 (v, w);
1.1  mrg     } while (!vpaddd_u64 ((uint64x2_t)t));
1.1  mrg
1.1  mrg done:
1.1  mrg   /* Now that we've found the terminating substring, work out precisely where
1.1  mrg      we need to stop.  */
1.1  mrg   t = vandq_u8 (t, xmask);
1.1  mrg   m = vpaddlq_u8 (t);
1.1  mrg   m = vshlq_u16 (m, shift);
1.1  mrg   found = vaddvq_u16 (m);
1.1  mrg   return (((((uintptr_t) p) < (uintptr_t) s) ? s : (const uchar *)p)
1.1  mrg 	  + __builtin_ctz (found));
1.1  mrg }
1.1  mrg
1.1  mrg #elif defined (__ARM_NEON)
1.1  mrg #include "arm_neon.h"
1.1  mrg
1.1  mrg static const uchar *
1.1  mrg search_line_fast (const uchar *s, const uchar *end ATTRIBUTE_UNUSED)
1.1  mrg {
1.1  mrg   const uint8x16_t repl_nl = vdupq_n_u8 ('\n');
1.1  mrg   const uint8x16_t repl_cr = vdupq_n_u8 ('\r');
1.1  mrg   const uint8x16_t repl_bs = vdupq_n_u8 ('\\');
1.1  mrg   const uint8x16_t repl_qm = vdupq_n_u8 ('?');
1.1  mrg   const uint8x16_t xmask = (uint8x16_t) vdupq_n_u64 (0x8040201008040201ULL);
1.1  mrg
1.1  mrg   unsigned int misalign, found, mask;
1.1  mrg   const uint8_t *p;
1.1  mrg   uint8x16_t data;
1.1  mrg
1.1  mrg   /* Align the source pointer.  */
1.1  mrg   misalign = (uintptr_t)s & 15;
1.1  mrg   p = (const uint8_t *)((uintptr_t)s & -16);
1.1  mrg   data = vld1q_u8 (p);
1.1  mrg
1.1  mrg   /* Create a mask for the bytes that are valid within the first
1.1  mrg      16-byte block.  The Idea here is that the AND with the mask
1.1  mrg      within the loop is "free", since we need some AND or TEST
1.1  mrg      insn in order to set the flags for the branch anyway.  */
1.1  mrg   mask = (-1u << misalign) & 0xffff;
1.1  mrg
1.1  mrg   /* Main loop, processing 16 bytes at a time.  */
1.1  mrg   goto start;
1.1  mrg
1.1  mrg   do
1.1  mrg     {
1.1  mrg       uint8x8_t l;
1.1  mrg       uint16x4_t m;
1.1  mrg       uint32x2_t n;
1.1  mrg       uint8x16_t t, u, v, w;
1.1  mrg
1.1  mrg       p += 16;
1.1  mrg       data = vld1q_u8 (p);
1.1  mrg       mask = 0xffff;
1.1  mrg
1.1  mrg     start:
1.1  mrg       t = vceqq_u8 (data, repl_nl);
1.1  mrg       u = vceqq_u8 (data, repl_cr);
1.1  mrg       v = vorrq_u8 (t, vceqq_u8 (data, repl_bs));
1.1  mrg       w = vorrq_u8 (u, vceqq_u8 (data, repl_qm));
1.1  mrg       t = vandq_u8 (vorrq_u8 (v, w), xmask);
1.1  mrg       l = vpadd_u8 (vget_low_u8 (t), vget_high_u8 (t));
1.1  mrg       m = vpaddl_u8 (l);
1.1  mrg       n = vpaddl_u16 (m);
1.1  mrg
1.1  mrg       found = vget_lane_u32 ((uint32x2_t) vorr_u64 ((uint64x1_t) n,
1.1  mrg 	      vshr_n_u64 ((uint64x1_t) n, 24)), 0);
1.1  mrg       found &= mask;
1.1  mrg     }
1.1  mrg   while (!found);
1.1  mrg
1.1  mrg   /* FOUND contains 1 in bits for which we matched a relevant
1.1  mrg      character.  Conversion to the byte index is trivial.  */
1.1  mrg   found = __builtin_ctz (found);
1.1  mrg   return (const uchar *)p + found;
1.1  mrg }
1.1  mrg
1.1  mrg #else
1.1  mrg
1.1  mrg /* We only have one accelerated alternative.  Use a direct call so that
1.1  mrg    we encourage inlining.  */
1.1  mrg
1.1  mrg #define search_line_fast  search_line_acc_char
1.1  mrg
1.1  mrg #endif
1.1  mrg
1.1  mrg /* Initialize the lexer if needed.  */
1.1  mrg
1.1  mrg void
1.1  mrg _cpp_init_lexer (void)
1.1  mrg {
1.1  mrg #ifdef HAVE_init_vectorized_lexer
1.1  mrg   init_vectorized_lexer ();
1.1  mrg #endif
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns with a logical line that contains no escaped newlines or
1.1  mrg    trigraphs.  This is a time-critical inner loop.  */
1.1  mrg void
1.1  mrg _cpp_clean_line (cpp_reader *pfile)
1.1  mrg {
1.1  mrg   cpp_buffer *buffer;
1.1  mrg   const uchar *s;
1.1  mrg   uchar c, *d, *p;
1.1  mrg
1.1  mrg   buffer = pfile->buffer;
1.1  mrg   buffer->cur_note = buffer->notes_used = 0;
1.1  mrg   buffer->cur = buffer->line_base = buffer->next_line;
1.1  mrg   buffer->need_line = false;
1.1  mrg   s = buffer->next_line;
1.1  mrg
1.1  mrg   if (!buffer->from_stage3)
1.1  mrg     {
1.1  mrg       const uchar *pbackslash = NULL;
1.1  mrg
1.1  mrg       /* Fast path.  This is the common case of an un-escaped line with
1.1  mrg 	 no trigraphs.  The primary win here is by not writing any
1.1  mrg 	 data back to memory until we have to.  */
1.1  mrg       while (1)
1.1  mrg 	{
1.1  mrg 	  /* Perform an optimized search for \n, \r, \\, ?.  */
1.1  mrg 	  s = search_line_fast (s, buffer->rlimit);
1.1  mrg
1.1  mrg 	  c = *s;
1.1  mrg 	  if (c == '\\')
1.1  mrg 	    {
1.1  mrg 	      /* Record the location of the backslash and continue.  */
1.1  mrg 	      pbackslash = s++;
1.1  mrg 	    }
1.1  mrg 	  else if (__builtin_expect (c == '?', 0))
1.1  mrg 	    {
1.1  mrg 	      if (__builtin_expect (s[1] == '?', false)
1.1  mrg 		   && _cpp_trigraph_map[s[2]])
1.1  mrg 		{
1.1  mrg 		  /* Have a trigraph.  We may or may not have to convert
1.1  mrg 		     it.  Add a line note regardless, for -Wtrigraphs.  */
1.1  mrg 		  add_line_note (buffer, s, s[2]);
1.1  mrg 		  if (CPP_OPTION (pfile, trigraphs))
1.1  mrg 		    {
1.1  mrg 		      /* We do, and that means we have to switch to the
1.1  mrg 		         slow path.  */
1.1  mrg 		      d = (uchar *) s;
1.1  mrg 		      *d = _cpp_trigraph_map[s[2]];
1.1  mrg 		      s += 2;
1.1  mrg 		      goto slow_path;
1.1  mrg 		    }
1.1  mrg 		}
1.1  mrg 	      /* Not a trigraph.  Continue on fast-path.  */
1.1  mrg 	      s++;
1.1  mrg 	    }
1.1  mrg 	  else
1.1  mrg 	    break;
1.1  mrg 	}
1.1  mrg
1.1  mrg       /* This must be \r or \n.  We're either done, or we'll be forced
1.1  mrg 	 to write back to the buffer and continue on the slow path.  */
1.1  mrg       d = (uchar *) s;
1.1  mrg
1.1  mrg       if (__builtin_expect (s == buffer->rlimit, false))
1.1  mrg 	goto done;
1.1  mrg
1.1  mrg       /* DOS line ending? */
1.1  mrg       if (__builtin_expect (c == '\r', false) && s[1] == '\n')
1.1  mrg 	{
1.1  mrg 	  s++;
1.1  mrg 	  if (s == buffer->rlimit)
1.1  mrg 	    goto done;
1.1  mrg 	}
1.1  mrg
1.1  mrg       if (__builtin_expect (pbackslash == NULL, true))
1.1  mrg 	goto done;
1.1  mrg
1.1  mrg       /* Check for escaped newline.  */
1.1  mrg       p = d;
1.1  mrg       while (is_nvspace (p[-1]))
1.1  mrg 	p--;
1.1  mrg       if (p - 1 != pbackslash)
1.1  mrg 	goto done;
1.1  mrg
1.1  mrg       /* Have an escaped newline; process it and proceed to
1.1  mrg 	 the slow path.  */
1.1  mrg       add_line_note (buffer, p - 1, p != d ? ' ' : '\\');
1.1  mrg       d = p - 2;
1.1  mrg       buffer->next_line = p - 1;
1.1  mrg
1.1  mrg     slow_path:
1.1  mrg       while (1)
1.1  mrg 	{
1.1  mrg 	  c = *++s;
1.1  mrg 	  *++d = c;
1.1  mrg
1.1  mrg 	  if (c == '\n' || c == '\r')
1.1  mrg 	    {
1.1  mrg 	      /* Handle DOS line endings.  */
1.1  mrg 	      if (c == '\r' && s != buffer->rlimit && s[1] == '\n')
1.1  mrg 		s++;
1.1  mrg 	      if (s == buffer->rlimit)
1.1  mrg 		break;
1.1  mrg
1.1  mrg 	      /* Escaped?  */
1.1  mrg 	      p = d;
1.1  mrg 	      while (p != buffer->next_line && is_nvspace (p[-1]))
1.1  mrg 		p--;
1.1  mrg 	      if (p == buffer->next_line || p[-1] != '\\')
1.1  mrg 		break;
1.1  mrg
1.1  mrg 	      add_line_note (buffer, p - 1, p != d ? ' ': '\\');
1.1  mrg 	      d = p - 2;
1.1  mrg 	      buffer->next_line = p - 1;
1.1  mrg 	    }
1.1  mrg 	  else if (c == '?' && s[1] == '?' && _cpp_trigraph_map[s[2]])
1.1  mrg 	    {
1.1  mrg 	      /* Add a note regardless, for the benefit of -Wtrigraphs.  */
1.1  mrg 	      add_line_note (buffer, d, s[2]);
1.1  mrg 	      if (CPP_OPTION (pfile, trigraphs))
1.1  mrg 		{
1.1  mrg 		  *d = _cpp_trigraph_map[s[2]];
1.1  mrg 		  s += 2;
1.1  mrg 		}
1.1  mrg 	    }
1.1  mrg 	}
1.1  mrg     }
1.1  mrg   else
1.1  mrg     {
1.1  mrg       while (*s != '\n' && *s != '\r')
1.1  mrg 	s++;
1.1  mrg       d = (uchar *) s;
1.1  mrg
1.1  mrg       /* Handle DOS line endings.  */
1.1  mrg       if (*s == '\r' && s + 1 != buffer->rlimit && s[1] == '\n')
1.1  mrg 	s++;
1.1  mrg     }
1.1  mrg
1.1  mrg  done:
1.1  mrg   *d = '\n';
1.1  mrg   /* A sentinel note that should never be processed.  */
1.1  mrg   add_line_note (buffer, d + 1, '\n');
1.1  mrg   buffer->next_line = s + 1;
1.1  mrg }
1.1  mrg
1.1  mrg /* Return true if the trigraph indicated by NOTE should be warned
1.1  mrg    about in a comment.  */
1.1  mrg static bool
1.1  mrg warn_in_comment (cpp_reader *pfile, _cpp_line_note *note)
1.1  mrg {
1.1  mrg   const uchar *p;
1.1  mrg
1.1  mrg   /* Within comments we don't warn about trigraphs, unless the
1.1  mrg      trigraph forms an escaped newline, as that may change
1.1  mrg      behavior.  */
1.1  mrg   if (note->type != '/')
1.1  mrg     return false;
1.1  mrg
1.1  mrg   /* If -trigraphs, then this was an escaped newline iff the next note
1.1  mrg      is coincident.  */
1.1  mrg   if (CPP_OPTION (pfile, trigraphs))
1.1  mrg     return note[1].pos == note->pos;
1.1  mrg
1.1  mrg   /* Otherwise, see if this forms an escaped newline.  */
1.1  mrg   p = note->pos + 3;
1.1  mrg   while (is_nvspace (*p))
1.1  mrg     p++;
1.1  mrg
1.1  mrg   /* There might have been escaped newlines between the trigraph and the
1.1  mrg      newline we found.  Hence the position test.  */
1.1  mrg   return (*p == '\n' && p < note[1].pos);
1.1  mrg }
1.1  mrg
1.1  mrg /* Process the notes created by add_line_note as far as the current
1.1  mrg    location.  */
1.1  mrg void
1.1  mrg _cpp_process_line_notes (cpp_reader *pfile, int in_comment)
1.1  mrg {
1.1  mrg   cpp_buffer *buffer = pfile->buffer;
1.1  mrg
1.1  mrg   for (;;)
1.1  mrg     {
1.1  mrg       _cpp_line_note *note = &buffer->notes[buffer->cur_note];
1.1  mrg       unsigned int col;
1.1  mrg
1.1  mrg       if (note->pos > buffer->cur)
1.1  mrg 	break;
1.1  mrg
1.1  mrg       buffer->cur_note++;
1.1  mrg       col = CPP_BUF_COLUMN (buffer, note->pos + 1);
1.1  mrg
1.1  mrg       if (note->type == '\\' || note->type == ' ')
1.1  mrg 	{
1.1  mrg 	  if (note->type == ' ' && !in_comment)
1.1  mrg 	    cpp_error_with_line (pfile, CPP_DL_WARNING, pfile->line_table->highest_line, col,
1.1  mrg 				 "backslash and newline separated by space");
1.1  mrg
1.1  mrg 	  if (buffer->next_line > buffer->rlimit)
1.1  mrg 	    {
1.1  mrg 	      cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line, col,
1.1  mrg 				   "backslash-newline at end of file");
1.1  mrg 	      /* Prevent "no newline at end of file" warning.  */
1.1  mrg 	      buffer->next_line = buffer->rlimit;
1.1  mrg 	    }
1.1  mrg
1.1  mrg 	  buffer->line_base = note->pos;
1.1  mrg 	  CPP_INCREMENT_LINE (pfile, 0);
1.1  mrg 	}
1.1  mrg       else if (_cpp_trigraph_map[note->type])
1.1  mrg 	{
1.1  mrg 	  if (CPP_OPTION (pfile, warn_trigraphs)
1.1  mrg 	      && (!in_comment || warn_in_comment (pfile, note)))
1.1  mrg 	    {
1.1  mrg 	      if (CPP_OPTION (pfile, trigraphs))
1.1  mrg 		cpp_warning_with_line (pfile, CPP_W_TRIGRAPHS,
1.1  mrg                                        pfile->line_table->highest_line, col,
1.1  mrg 				       "trigraph ??%c converted to %c",
1.1  mrg 				       note->type,
1.1  mrg 				       (int) _cpp_trigraph_map[note->type]);
1.1  mrg 	      else
1.1  mrg 		{
1.1  mrg 		  cpp_warning_with_line
1.1  mrg 		    (pfile, CPP_W_TRIGRAPHS,
1.1  mrg                      pfile->line_table->highest_line, col,
1.1  mrg 		     "trigraph ??%c ignored, use -trigraphs to enable",
1.1  mrg 		     note->type);
1.1  mrg 		}
1.1  mrg 	    }
1.1  mrg 	}
1.1  mrg       else if (note->type == 0)
1.1  mrg 	/* Already processed in lex_raw_string.  */;
1.1  mrg       else
1.1  mrg 	abort ();
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg namespace bidi {
1.1  mrg   enum class kind {
1.1  mrg     NONE, LRE, RLE, LRO, RLO, LRI, RLI, FSI, PDF, PDI, LTR, RTL
1.1  mrg   };
1.1  mrg
1.1  mrg   /* All the UTF-8 encodings of bidi characters start with E2.  */
1.1  mrg   constexpr uchar utf8_start = 0xe2;
1.1  mrg
1.1  mrg   struct context
1.1  mrg   {
1.1  mrg     context () {}
1.1  mrg     context (location_t loc, kind k, bool pdf, bool ucn)
1.1  mrg     : m_loc (loc), m_kind (k), m_pdf (pdf), m_ucn (ucn)
1.1  mrg     {
1.1  mrg     }
1.1  mrg
1.1  mrg     kind get_pop_kind () const
1.1  mrg     {
1.1  mrg       return m_pdf ? kind::PDF : kind::PDI;
1.1  mrg     }
1.1  mrg     bool ucn_p () const
1.1  mrg     {
1.1  mrg       return m_ucn;
1.1  mrg     }
1.1  mrg
1.1  mrg     location_t m_loc;
1.1  mrg     kind m_kind;
1.1  mrg     unsigned m_pdf : 1;
1.1  mrg     unsigned m_ucn : 1;
1.1  mrg   };
1.1  mrg
1.1  mrg   /* A vector holding currently open bidi contexts.  We use a char for
1.1  mrg      each context, its LSB is 1 if it represents a PDF context, 0 if it
1.1  mrg      represents a PDI context.  The next bit is 1 if this context was open
1.1  mrg      by a bidi character written as a UCN, and 0 when it was UTF-8.  */
1.1  mrg   semi_embedded_vec <context, 16> vec;
1.1  mrg
1.1  mrg   /* Close the whole comment/identifier/string literal/character constant
1.1  mrg      context.  */
1.1  mrg   void on_close ()
1.1  mrg   {
1.1  mrg     vec.truncate (0);
1.1  mrg   }
1.1  mrg
1.1  mrg   /* Pop the last element in the vector.  */
1.1  mrg   void pop ()
1.1  mrg   {
1.1  mrg     unsigned int len = vec.count ();
1.1  mrg     gcc_checking_assert (len > 0);
1.1  mrg     vec.truncate (len - 1);
1.1  mrg   }
1.1  mrg
1.1  mrg   /* Return the pop kind of the context of the Ith element.  */
1.1  mrg   kind pop_kind_at (unsigned int i)
1.1  mrg   {
1.1  mrg     return vec[i].get_pop_kind ();
1.1  mrg   }
1.1  mrg
1.1  mrg   /* Return the pop kind of the context that is currently opened.  */
1.1  mrg   kind current_ctx ()
1.1  mrg   {
1.1  mrg     unsigned int len = vec.count ();
1.1  mrg     if (len == 0)
1.1  mrg       return kind::NONE;
1.1  mrg     return vec[len - 1].get_pop_kind ();
1.1  mrg   }
1.1  mrg
1.1  mrg   /* Return true if the current context comes from a UCN origin, that is,
1.1  mrg      the bidi char which started this bidi context was written as a UCN.  */
1.1  mrg   bool current_ctx_ucn_p ()
1.1  mrg   {
1.1  mrg     unsigned int len = vec.count ();
1.1  mrg     gcc_checking_assert (len > 0);
1.1  mrg     return vec[len - 1].m_ucn;
1.1  mrg   }
1.1  mrg
1.1  mrg   location_t current_ctx_loc ()
1.1  mrg   {
1.1  mrg     unsigned int len = vec.count ();
1.1  mrg     gcc_checking_assert (len > 0);
1.1  mrg     return vec[len - 1].m_loc;
1.1  mrg   }
1.1  mrg
1.1  mrg   /* We've read a bidi char, update the current vector as necessary.
1.1  mrg      LOC is only valid when K is not kind::NONE.  */
1.1  mrg   void on_char (kind k, bool ucn_p, location_t loc)
1.1  mrg   {
1.1  mrg     switch (k)
1.1  mrg       {
1.1  mrg       case kind::LRE:
1.1  mrg       case kind::RLE:
1.1  mrg       case kind::LRO:
1.1  mrg       case kind::RLO:
1.1  mrg 	vec.push (context (loc, k, true, ucn_p));
1.1  mrg 	break;
1.1  mrg       case kind::LRI:
1.1  mrg       case kind::RLI:
1.1  mrg       case kind::FSI:
1.1  mrg 	vec.push (context (loc, k, false, ucn_p));
1.1  mrg 	break;
1.1  mrg       /* PDF terminates the scope of the last LRE, RLE, LRO, or RLO
1.1  mrg 	 whose scope has not yet been terminated.  */
1.1  mrg       case kind::PDF:
1.1  mrg 	if (current_ctx () == kind::PDF)
1.1  mrg 	  pop ();
1.1  mrg 	break;
1.1  mrg       /* PDI terminates the scope of the last LRI, RLI, or FSI whose
1.1  mrg 	 scope has not yet been terminated, as well as the scopes of
1.1  mrg 	 any subsequent LREs, RLEs, LROs, or RLOs whose scopes have not
1.1  mrg 	 yet been terminated.  */
1.1  mrg       case kind::PDI:
1.1  mrg 	for (int i = vec.count () - 1; i >= 0; --i)
1.1  mrg 	  if (pop_kind_at (i) == kind::PDI)
1.1  mrg 	    {
1.1  mrg 	      vec.truncate (i);
1.1  mrg 	      break;
1.1  mrg 	    }
1.1  mrg 	break;
1.1  mrg       case kind::LTR:
1.1  mrg       case kind::RTL:
1.1  mrg 	/* These aren't popped by a PDF/PDI.  */
1.1  mrg 	break;
1.1  mrg       ATTR_LIKELY case kind::NONE:
1.1  mrg 	break;
1.1  mrg       default:
1.1  mrg 	abort ();
1.1  mrg       }
1.1  mrg   }
1.1  mrg
1.1  mrg   /* Return a descriptive string for K.  */
1.1  mrg   const char *to_str (kind k)
1.1  mrg   {
1.1  mrg     switch (k)
1.1  mrg       {
1.1  mrg       case kind::LRE:
1.1  mrg 	return "U+202A (LEFT-TO-RIGHT EMBEDDING)";
1.1  mrg       case kind::RLE:
1.1  mrg 	return "U+202B (RIGHT-TO-LEFT EMBEDDING)";
1.1  mrg       case kind::LRO:
1.1  mrg 	return "U+202D (LEFT-TO-RIGHT OVERRIDE)";
1.1  mrg       case kind::RLO:
1.1  mrg 	return "U+202E (RIGHT-TO-LEFT OVERRIDE)";
1.1  mrg       case kind::LRI:
1.1  mrg 	return "U+2066 (LEFT-TO-RIGHT ISOLATE)";
1.1  mrg       case kind::RLI:
1.1  mrg 	return "U+2067 (RIGHT-TO-LEFT ISOLATE)";
1.1  mrg       case kind::FSI:
1.1  mrg 	return "U+2068 (FIRST STRONG ISOLATE)";
1.1  mrg       case kind::PDF:
1.1  mrg 	return "U+202C (POP DIRECTIONAL FORMATTING)";
1.1  mrg       case kind::PDI:
1.1  mrg 	return "U+2069 (POP DIRECTIONAL ISOLATE)";
1.1  mrg       case kind::LTR:
1.1  mrg 	return "U+200E (LEFT-TO-RIGHT MARK)";
1.1  mrg       case kind::RTL:
1.1  mrg 	return "U+200F (RIGHT-TO-LEFT MARK)";
1.1  mrg       default:
1.1  mrg 	abort ();
1.1  mrg       }
1.1  mrg   }
1.1  mrg }
1.1  mrg
1.1  mrg /* Get location_t for the range of bytes [START, START + NUM_BYTES)
1.1  mrg    within the current line in FILE, with the caret at START.  */
1.1  mrg
1.1  mrg static location_t
1.1  mrg get_location_for_byte_range_in_cur_line (cpp_reader *pfile,
1.1  mrg 					 const unsigned char *const start,
1.1  mrg 					 size_t num_bytes)
1.1  mrg {
1.1  mrg   gcc_checking_assert (num_bytes > 0);
1.1  mrg
1.1  mrg   /* CPP_BUF_COLUMN and linemap_position_for_column both refer
1.1  mrg      to offsets in bytes, but CPP_BUF_COLUMN is 0-based,
1.1  mrg      whereas linemap_position_for_column is 1-based.  */
1.1  mrg
1.1  mrg   /* Get 0-based offsets within the line.  */
1.1  mrg   size_t start_offset = CPP_BUF_COLUMN (pfile->buffer, start);
1.1  mrg   size_t end_offset = start_offset + num_bytes - 1;
1.1  mrg
1.1  mrg   /* Now convert to location_t, where "columns" are 1-based byte offsets.  */
1.1  mrg   location_t start_loc = linemap_position_for_column (pfile->line_table,
1.1  mrg 						      start_offset + 1);
1.1  mrg   location_t end_loc = linemap_position_for_column (pfile->line_table,
1.1  mrg 						     end_offset + 1);
1.1  mrg
1.1  mrg   if (start_loc == end_loc)
1.1  mrg     return start_loc;
1.1  mrg
1.1  mrg   source_range src_range;
1.1  mrg   src_range.m_start = start_loc;
1.1  mrg   src_range.m_finish = end_loc;
1.1  mrg   location_t combined_loc = COMBINE_LOCATION_DATA (pfile->line_table,
1.1  mrg 						   start_loc,
1.1  mrg 						   src_range,
1.1  mrg 						   NULL);
1.1  mrg   return combined_loc;
1.1  mrg }
1.1  mrg
1.1  mrg /* Parse a sequence of 3 bytes starting with P and return its bidi code.  */
1.1  mrg
1.1  mrg static bidi::kind
1.1  mrg get_bidi_utf8_1 (const unsigned char *const p)
1.1  mrg {
1.1  mrg   gcc_checking_assert (p[0] == bidi::utf8_start);
1.1  mrg
1.1  mrg   if (p[1] == 0x80)
1.1  mrg     switch (p[2])
1.1  mrg       {
1.1  mrg       case 0xaa:
1.1  mrg 	return bidi::kind::LRE;
1.1  mrg       case 0xab:
1.1  mrg 	return bidi::kind::RLE;
1.1  mrg       case 0xac:
1.1  mrg 	return bidi::kind::PDF;
1.1  mrg       case 0xad:
1.1  mrg 	return bidi::kind::LRO;
1.1  mrg       case 0xae:
1.1  mrg 	return bidi::kind::RLO;
1.1  mrg       case 0x8e:
1.1  mrg 	return bidi::kind::LTR;
1.1  mrg       case 0x8f:
1.1  mrg 	return bidi::kind::RTL;
1.1  mrg       default:
1.1  mrg 	break;
1.1  mrg       }
1.1  mrg   else if (p[1] == 0x81)
1.1  mrg     switch (p[2])
1.1  mrg       {
1.1  mrg       case 0xa6:
1.1  mrg 	return bidi::kind::LRI;
1.1  mrg       case 0xa7:
1.1  mrg 	return bidi::kind::RLI;
1.1  mrg       case 0xa8:
1.1  mrg 	return bidi::kind::FSI;
1.1  mrg       case 0xa9:
1.1  mrg 	return bidi::kind::PDI;
1.1  mrg       default:
1.1  mrg 	break;
1.1  mrg       }
1.1  mrg
1.1  mrg   return bidi::kind::NONE;
1.1  mrg }
1.1  mrg
1.1  mrg /* Parse a sequence of 3 bytes starting with P and return its bidi code.
1.1  mrg    If the kind is not NONE, write the location to *OUT.*/
1.1  mrg
1.1  mrg static bidi::kind
1.1  mrg get_bidi_utf8 (cpp_reader *pfile, const unsigned char *const p, location_t *out)
1.1  mrg {
1.1  mrg   bidi::kind result = get_bidi_utf8_1 (p);
1.1  mrg   if (result != bidi::kind::NONE)
1.1  mrg     {
1.1  mrg       /* We have a sequence of 3 bytes starting at P.  */
1.1  mrg       *out = get_location_for_byte_range_in_cur_line (pfile, p, 3);
1.1  mrg     }
1.1  mrg   return result;
1.1  mrg }
1.1  mrg
1.1  mrg /* Parse a UCN where P points just past \u or \U and return its bidi code.  */
1.1  mrg
1.1  mrg static bidi::kind
1.1  mrg get_bidi_ucn_1 (const unsigned char *p, bool is_U)
1.1  mrg {
1.1  mrg   /* 6.4.3 Universal Character Names
1.1  mrg       \u hex-quad
1.1  mrg       \U hex-quad hex-quad
1.1  mrg      where \unnnn means \U0000nnnn.  */
1.1  mrg
1.1  mrg   if (is_U)
1.1  mrg     {
1.1  mrg       if (p[0] != '0' || p[1] != '0' || p[2] != '0' || p[3] != '0')
1.1  mrg 	return bidi::kind::NONE;
1.1  mrg       /* Skip 4B so we can treat \u and \U the same below.  */
1.1  mrg       p += 4;
1.1  mrg     }
1.1  mrg
1.1  mrg   /* All code points we are looking for start with 20xx.  */
1.1  mrg   if (p[0] != '2' || p[1] != '0')
1.1  mrg     return bidi::kind::NONE;
1.1  mrg   else if (p[2] == '2')
1.1  mrg     switch (p[3])
1.1  mrg       {
1.1  mrg       case 'a':
1.1  mrg       case 'A':
1.1  mrg 	return bidi::kind::LRE;
1.1  mrg       case 'b':
1.1  mrg       case 'B':
1.1  mrg 	return bidi::kind::RLE;
1.1  mrg       case 'c':
1.1  mrg       case 'C':
1.1  mrg 	return bidi::kind::PDF;
1.1  mrg       case 'd':
1.1  mrg       case 'D':
1.1  mrg 	return bidi::kind::LRO;
1.1  mrg       case 'e':
1.1  mrg       case 'E':
1.1  mrg 	return bidi::kind::RLO;
1.1  mrg       default:
1.1  mrg 	break;
1.1  mrg       }
1.1  mrg   else if (p[2] == '6')
1.1  mrg     switch (p[3])
1.1  mrg       {
1.1  mrg       case '6':
1.1  mrg 	return bidi::kind::LRI;
1.1  mrg       case '7':
1.1  mrg 	return bidi::kind::RLI;
1.1  mrg       case '8':
1.1  mrg 	return bidi::kind::FSI;
1.1  mrg       case '9':
1.1  mrg 	return bidi::kind::PDI;
1.1  mrg       default:
1.1  mrg 	break;
1.1  mrg       }
1.1  mrg   else if (p[2] == '0')
1.1  mrg     switch (p[3])
1.1  mrg       {
1.1  mrg       case 'e':
1.1  mrg       case 'E':
1.1  mrg 	return bidi::kind::LTR;
1.1  mrg       case 'f':
1.1  mrg       case 'F':
1.1  mrg 	return bidi::kind::RTL;
1.1  mrg       default:
1.1  mrg 	break;
1.1  mrg       }
1.1  mrg
1.1  mrg   return bidi::kind::NONE;
1.1  mrg }
1.1  mrg
1.1  mrg /* Parse a UCN where P points just past \u or \U and return its bidi code.
1.1  mrg    If the kind is not NONE, write the location to *OUT.*/
1.1  mrg
1.1  mrg static bidi::kind
1.1  mrg get_bidi_ucn (cpp_reader *pfile,  const unsigned char *p, bool is_U,
1.1  mrg 	      location_t *out)
1.1  mrg {
1.1  mrg   bidi::kind result = get_bidi_ucn_1 (p, is_U);
1.1  mrg   if (result != bidi::kind::NONE)
1.1  mrg     {
1.1  mrg       const unsigned char *start = p - 2;
1.1  mrg       size_t num_bytes = 2 + (is_U ? 8 : 4);
1.1  mrg       *out = get_location_for_byte_range_in_cur_line (pfile, start, num_bytes);
1.1  mrg     }
1.1  mrg   return result;
1.1  mrg }
1.1  mrg
1.1  mrg /* Subclass of rich_location for reporting on unpaired UTF-8
1.1  mrg    bidirectional control character(s).
1.1  mrg    Escape the source lines on output, and show all unclosed
1.1  mrg    bidi context, labelling everything.  */
1.1  mrg
1.1  mrg class unpaired_bidi_rich_location : public rich_location
1.1  mrg {
1.1  mrg  public:
1.1  mrg   class custom_range_label : public range_label
1.1  mrg   {
1.1  mrg    public:
1.1  mrg      label_text get_text (unsigned range_idx) const FINAL OVERRIDE
1.1  mrg      {
1.1  mrg        /* range 0 is the primary location; each subsequent range i + 1
1.1  mrg 	  is for bidi::vec[i].  */
1.1  mrg        if (range_idx > 0)
1.1  mrg 	 {
1.1  mrg 	   const bidi::context &ctxt (bidi::vec[range_idx - 1]);
1.1  mrg 	   return label_text::borrow (bidi::to_str (ctxt.m_kind));
1.1  mrg 	 }
1.1  mrg        else
1.1  mrg 	 return label_text::borrow (_("end of bidirectional context"));
1.1  mrg      }
1.1  mrg   };
1.1  mrg
1.1  mrg   unpaired_bidi_rich_location (cpp_reader *pfile, location_t loc)
1.1  mrg   : rich_location (pfile->line_table, loc, &m_custom_label)
1.1  mrg   {
1.1  mrg     set_escape_on_output (true);
1.1  mrg     for (unsigned i = 0; i < bidi::vec.count (); i++)
1.1  mrg       add_range (bidi::vec[i].m_loc,
1.1  mrg 		 SHOW_RANGE_WITHOUT_CARET,
1.1  mrg 		 &m_custom_label);
1.1  mrg   }
1.1  mrg
1.1  mrg  private:
1.1  mrg    custom_range_label m_custom_label;
1.1  mrg };
1.1  mrg
1.1  mrg /* We're closing a bidi context, that is, we've encountered a newline,
1.1  mrg    are closing a C-style comment, or are at the end of a string literal,
1.1  mrg    character constant, or identifier.  Warn if this context was not
1.1  mrg    properly terminated by a PDI or PDF.  P points to the last character
1.1  mrg    in this context.  */
1.1  mrg
1.1  mrg static void
1.1  mrg maybe_warn_bidi_on_close (cpp_reader *pfile, const uchar *p)
1.1  mrg {
1.1  mrg   const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
1.1  mrg   if (bidi::vec.count () > 0
1.1  mrg       && (warn_bidi & bidirectional_unpaired
1.1  mrg 	  && (!bidi::current_ctx_ucn_p ()
1.1  mrg 	      || (warn_bidi & bidirectional_ucn))))
1.1  mrg     {
1.1  mrg       const location_t loc
1.1  mrg 	= linemap_position_for_column (pfile->line_table,
1.1  mrg 				       CPP_BUF_COLUMN (pfile->buffer, p));
1.1  mrg       unpaired_bidi_rich_location rich_loc (pfile, loc);
1.1  mrg       /* cpp_callbacks doesn't yet have a way to handle singular vs plural
1.1  mrg 	 forms of a diagnostic, so fake it for now.  */
1.1  mrg       if (bidi::vec.count () > 1)
1.1  mrg 	cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
1.1  mrg 			"unpaired UTF-8 bidirectional control characters "
1.1  mrg 			"detected");
1.1  mrg       else
1.1  mrg 	cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
1.1  mrg 			"unpaired UTF-8 bidirectional control character "
1.1  mrg 			"detected");
1.1  mrg     }
1.1  mrg   /* We're done with this context.  */
1.1  mrg   bidi::on_close ();
1.1  mrg }
1.1  mrg
1.1  mrg /* We're at the beginning or in the middle of an identifier/comment/string
1.1  mrg    literal/character constant.  Warn if we've encountered a bidi character.
1.1  mrg    KIND says which bidi control character it was; UCN_P is true iff this bidi
1.1  mrg    control character was written as a UCN.  LOC is the location of the
1.1  mrg    character, but is only valid if KIND != bidi::kind::NONE.  */
1.1  mrg
1.1  mrg static void
1.1  mrg maybe_warn_bidi_on_char (cpp_reader *pfile, bidi::kind kind,
1.1  mrg 			 bool ucn_p, location_t loc)
1.1  mrg {
1.1  mrg   if (__builtin_expect (kind == bidi::kind::NONE, 1))
1.1  mrg     return;
1.1  mrg
1.1  mrg   const auto warn_bidi = CPP_OPTION (pfile, cpp_warn_bidirectional);
1.1  mrg
1.1  mrg   if (warn_bidi & (bidirectional_unpaired|bidirectional_any))
1.1  mrg     {
1.1  mrg       rich_location rich_loc (pfile->line_table, loc);
1.1  mrg       rich_loc.set_escape_on_output (true);
1.1  mrg
1.1  mrg       /* It seems excessive to warn about a PDI/PDF that is closing
1.1  mrg 	 an opened context because we've already warned about the
1.1  mrg 	 opening character.  Except warn when we have a UCN x UTF-8
1.1  mrg 	 mismatch, if UCN checking is enabled.  */
1.1  mrg       if (kind == bidi::current_ctx ())
1.1  mrg 	{
1.1  mrg 	  if (warn_bidi == (bidirectional_unpaired|bidirectional_ucn)
1.1  mrg 	      && bidi::current_ctx_ucn_p () != ucn_p)
1.1  mrg 	    {
1.1  mrg 	      rich_loc.add_range (bidi::current_ctx_loc ());
1.1  mrg 	      cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
1.1  mrg 			      "UTF-8 vs UCN mismatch when closing "
1.1  mrg 			      "a context by \"%s\"", bidi::to_str (kind));
1.1  mrg 	    }
1.1  mrg 	}
1.1  mrg       else if (warn_bidi & bidirectional_any
1.1  mrg 	       && (!ucn_p || (warn_bidi & bidirectional_ucn)))
1.1  mrg 	{
1.1  mrg 	  if (kind == bidi::kind::PDF || kind == bidi::kind::PDI)
1.1  mrg 	    cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
1.1  mrg 			    "\"%s\" is closing an unopened context",
1.1  mrg 			    bidi::to_str (kind));
1.1  mrg 	  else
1.1  mrg 	    cpp_warning_at (pfile, CPP_W_BIDIRECTIONAL, &rich_loc,
1.1  mrg 			    "found problematic Unicode character \"%s\"",
1.1  mrg 			    bidi::to_str (kind));
1.1  mrg 	}
1.1  mrg     }
1.1  mrg   /* We're done with this context.  */
1.1  mrg   bidi::on_char (kind, ucn_p, loc);
1.1  mrg }
1.1  mrg
1.1  mrg /* Skip a C-style block comment.  We find the end of the comment by
1.1  mrg    seeing if an asterisk is before every '/' we encounter.  Returns
1.1  mrg    nonzero if comment terminated by EOF, zero otherwise.
1.1  mrg
1.1  mrg    Buffer->cur points to the initial asterisk of the comment.  */
1.1  mrg bool
1.1  mrg _cpp_skip_block_comment (cpp_reader *pfile)
1.1  mrg {
1.1  mrg   cpp_buffer *buffer = pfile->buffer;
1.1  mrg   const uchar *cur = buffer->cur;
1.1  mrg   uchar c;
1.1  mrg   const bool warn_bidi_p = pfile->warn_bidi_p ();
1.1  mrg
1.1  mrg   cur++;
1.1  mrg   if (*cur == '/')
1.1  mrg     cur++;
1.1  mrg
1.1  mrg   for (;;)
1.1  mrg     {
1.1  mrg       /* People like decorating comments with '*', so check for '/'
1.1  mrg 	 instead for efficiency.  */
1.1  mrg       c = *cur++;
1.1  mrg
1.1  mrg       if (c == '/')
1.1  mrg 	{
1.1  mrg 	  if (cur[-2] == '*')
1.1  mrg 	    {
1.1  mrg 	      if (warn_bidi_p)
1.1  mrg 		maybe_warn_bidi_on_close (pfile, cur);
1.1  mrg 	      break;
1.1  mrg 	    }
1.1  mrg
1.1  mrg 	  /* Warn about potential nested comments, but not if the '/'
1.1  mrg 	     comes immediately before the true comment delimiter.
1.1  mrg 	     Don't bother to get it right across escaped newlines.  */
1.1  mrg 	  if (CPP_OPTION (pfile, warn_comments)
1.1  mrg 	      && cur[0] == '*' && cur[1] != '/')
1.1  mrg 	    {
1.1  mrg 	      buffer->cur = cur;
1.1  mrg 	      cpp_warning_with_line (pfile, CPP_W_COMMENTS,
1.1  mrg 				     pfile->line_table->highest_line,
1.1  mrg 				     CPP_BUF_COL (buffer),
1.1  mrg 				     "\"/*\" within comment");
1.1  mrg 	    }
1.1  mrg 	}
1.1  mrg       else if (c == '\n')
1.1  mrg 	{
1.1  mrg 	  unsigned int cols;
1.1  mrg 	  buffer->cur = cur - 1;
1.1  mrg 	  if (warn_bidi_p)
1.1  mrg 	    maybe_warn_bidi_on_close (pfile, cur);
1.1  mrg 	  _cpp_process_line_notes (pfile, true);
1.1  mrg 	  if (buffer->next_line >= buffer->rlimit)
1.1  mrg 	    return true;
1.1  mrg 	  _cpp_clean_line (pfile);
1.1  mrg
1.1  mrg 	  cols = buffer->next_line - buffer->line_base;
1.1  mrg 	  CPP_INCREMENT_LINE (pfile, cols);
1.1  mrg
1.1  mrg 	  cur = buffer->cur;
1.1  mrg 	}
1.1  mrg       /* If this is a beginning of a UTF-8 encoding, it might be
1.1  mrg 	 a bidirectional control character.  */
1.1  mrg       else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
1.1  mrg 	{
1.1  mrg 	  location_t loc;
1.1  mrg 	  bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
1.1  mrg 	  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
1.1  mrg 	}
1.1  mrg     }
1.1  mrg
1.1  mrg   buffer->cur = cur;
1.1  mrg   _cpp_process_line_notes (pfile, true);
1.1  mrg   return false;
1.1  mrg }
1.1  mrg
1.1  mrg /* Skip a C++ line comment, leaving buffer->cur pointing to the
1.1  mrg    terminating newline.  Handles escaped newlines.  Returns nonzero
1.1  mrg    if a multiline comment.  */
1.1  mrg static int
1.1  mrg skip_line_comment (cpp_reader *pfile)
1.1  mrg {
1.1  mrg   cpp_buffer *buffer = pfile->buffer;
1.1  mrg   location_t orig_line = pfile->line_table->highest_line;
1.1  mrg   const bool warn_bidi_p = pfile->warn_bidi_p ();
1.1  mrg
1.1  mrg   if (!warn_bidi_p)
1.1  mrg     while (*buffer->cur != '\n')
1.1  mrg       buffer->cur++;
1.1  mrg   else
1.1  mrg     {
1.1  mrg       while (*buffer->cur != '\n'
1.1  mrg 	     && *buffer->cur != bidi::utf8_start)
1.1  mrg 	buffer->cur++;
1.1  mrg       if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
1.1  mrg 	{
1.1  mrg 	  while (*buffer->cur != '\n')
1.1  mrg 	    {
1.1  mrg 	      if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0))
1.1  mrg 		{
1.1  mrg 		  location_t loc;
1.1  mrg 		  bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc);
1.1  mrg 		  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
1.1  mrg 		}
1.1  mrg 	      buffer->cur++;
1.1  mrg 	    }
1.1  mrg 	  maybe_warn_bidi_on_close (pfile, buffer->cur);
1.1  mrg 	}
1.1  mrg     }
1.1  mrg
1.1  mrg   _cpp_process_line_notes (pfile, true);
1.1  mrg   return orig_line != pfile->line_table->highest_line;
1.1  mrg }
1.1  mrg
1.1  mrg /* Skips whitespace, saving the next non-whitespace character.  */
1.1  mrg static void
1.1  mrg skip_whitespace (cpp_reader *pfile, cppchar_t c)
1.1  mrg {
1.1  mrg   cpp_buffer *buffer = pfile->buffer;
1.1  mrg   bool saw_NUL = false;
1.1  mrg
1.1  mrg   do
1.1  mrg     {
1.1  mrg       /* Horizontal space always OK.  */
1.1  mrg       if (c == ' ' || c == '\t')
1.1  mrg 	;
1.1  mrg       /* Just \f \v or \0 left.  */
1.1  mrg       else if (c == '\0')
1.1  mrg 	saw_NUL = true;
1.1  mrg       else if (pfile->state.in_directive && CPP_PEDANTIC (pfile))
1.1  mrg 	cpp_error_with_line (pfile, CPP_DL_PEDWARN, pfile->line_table->highest_line,
1.1  mrg 			     CPP_BUF_COL (buffer),
1.1  mrg 			     "%s in preprocessing directive",
1.1  mrg 			     c == '\f' ? "form feed" : "vertical tab");
1.1  mrg
1.1  mrg       c = *buffer->cur++;
1.1  mrg     }
1.1  mrg   /* We only want non-vertical space, i.e. ' ' \t \f \v \0.  */
1.1  mrg   while (is_nvspace (c));
1.1  mrg
1.1  mrg   if (saw_NUL)
1.1  mrg     {
1.1  mrg       encoding_rich_location rich_loc (pfile);
1.1  mrg       cpp_error_at (pfile, CPP_DL_WARNING, &rich_loc,
1.1  mrg 		    "null character(s) ignored");
1.1  mrg     }
1.1  mrg
1.1  mrg   buffer->cur--;
1.1  mrg }
1.1  mrg
1.1  mrg /* See if the characters of a number token are valid in a name (no
1.1  mrg    '.', '+' or '-').  */
1.1  mrg static int
1.1  mrg name_p (cpp_reader *pfile, const cpp_string *string)
1.1  mrg {
1.1  mrg   unsigned int i;
1.1  mrg
1.1  mrg   for (i = 0; i < string->len; i++)
1.1  mrg     if (!is_idchar (string->text[i]))
1.1  mrg       return 0;
1.1  mrg
1.1  mrg   return 1;
1.1  mrg }
1.1  mrg
1.1  mrg /* After parsing an identifier or other sequence, produce a warning about
1.1  mrg    sequences not in NFC/NFKC.  */
1.1  mrg static void
1.1  mrg warn_about_normalization (cpp_reader *pfile,
1.1  mrg 			  const cpp_token *token,
1.1  mrg 			  const struct normalize_state *s)
1.1  mrg {
1.1  mrg   if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
1.1  mrg       && !pfile->state.skipping)
1.1  mrg     {
1.1  mrg       location_t loc = token->src_loc;
1.1  mrg
1.1  mrg       /* If possible, create a location range for the token.  */
1.1  mrg       if (loc >= RESERVED_LOCATION_COUNT
1.1  mrg 	  && token->type != CPP_EOF
1.1  mrg 	  /* There must be no line notes to process.  */
1.1  mrg 	  && (!(pfile->buffer->cur
1.1  mrg 		>= pfile->buffer->notes[pfile->buffer->cur_note].pos
1.1  mrg 		&& !pfile->overlaid_buffer)))
1.1  mrg 	{
1.1  mrg 	  source_range tok_range;
1.1  mrg 	  tok_range.m_start = loc;
1.1  mrg 	  tok_range.m_finish
1.1  mrg 	    = linemap_position_for_column (pfile->line_table,
1.1  mrg 					   CPP_BUF_COLUMN (pfile->buffer,
1.1  mrg 							   pfile->buffer->cur));
1.1  mrg 	  loc = COMBINE_LOCATION_DATA (pfile->line_table,
1.1  mrg 				       loc, tok_range, NULL);
1.1  mrg 	}
1.1  mrg
1.1  mrg       encoding_rich_location rich_loc (pfile, loc);
1.1  mrg
1.1  mrg       /* Make sure that the token is printed using UCNs, even
1.1  mrg 	 if we'd otherwise happily print UTF-8.  */
1.1  mrg       unsigned char *buf = XNEWVEC (unsigned char, cpp_token_len (token));
1.1  mrg       size_t sz;
1.1  mrg
1.1  mrg       sz = cpp_spell_token (pfile, token, buf, false) - buf;
1.1  mrg       if (NORMALIZE_STATE_RESULT (s) == normalized_C)
1.1  mrg 	cpp_warning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
1.1  mrg 			"`%.*s' is not in NFKC", (int) sz, buf);
1.1  mrg       else if (CPP_OPTION (pfile, cplusplus))
1.1  mrg 	cpp_pedwarning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
1.1  mrg 				  "`%.*s' is not in NFC", (int) sz, buf);
1.1  mrg       else
1.1  mrg 	cpp_warning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
1.1  mrg 			"`%.*s' is not in NFC", (int) sz, buf);
1.1  mrg       free (buf);
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg static const cppchar_t utf8_signifier = 0xC0;
1.1  mrg
1.1  mrg /* Returns TRUE if the sequence starting at buffer->cur is valid in
1.1  mrg    an identifier.  FIRST is TRUE if this starts an identifier.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg forms_identifier_p (cpp_reader *pfile, int first,
1.1  mrg 		    struct normalize_state *state)
1.1  mrg {
1.1  mrg   cpp_buffer *buffer = pfile->buffer;
1.1  mrg   const bool warn_bidi_p = pfile->warn_bidi_p ();
1.1  mrg
1.1  mrg   if (*buffer->cur == '$')
1.1  mrg     {
1.1  mrg       if (!CPP_OPTION (pfile, dollars_in_ident))
1.1  mrg 	return false;
1.1  mrg
1.1  mrg       buffer->cur++;
1.1  mrg       if (CPP_OPTION (pfile, warn_dollars) && !pfile->state.skipping)
1.1  mrg 	{
1.1  mrg 	  CPP_OPTION (pfile, warn_dollars) = 0;
1.1  mrg 	  cpp_error (pfile, CPP_DL_PEDWARN, "'$' in identifier or number");
1.1  mrg 	}
1.1  mrg
1.1  mrg       return true;
1.1  mrg     }
1.1  mrg
1.1  mrg   /* Is this a syntactically valid UCN or a valid UTF-8 char?  */
1.1  mrg   if (CPP_OPTION (pfile, extended_identifiers))
1.1  mrg     {
1.1  mrg       cppchar_t s;
1.1  mrg       if (*buffer->cur >= utf8_signifier)
1.1  mrg 	{
1.1  mrg 	  if (__builtin_expect (*buffer->cur == bidi::utf8_start, 0)
1.1  mrg 	      && warn_bidi_p)
1.1  mrg 	    {
1.1  mrg 	      location_t loc;
1.1  mrg 	      bidi::kind kind = get_bidi_utf8 (pfile, buffer->cur, &loc);
1.1  mrg 	      maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
1.1  mrg 	    }
1.1  mrg 	  if (_cpp_valid_utf8 (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1.1  mrg 			       state, &s))
1.1  mrg 	    return true;
1.1  mrg 	}
1.1  mrg       else if (*buffer->cur == '\\'
1.1  mrg 	       && (buffer->cur[1] == 'u' || buffer->cur[1] == 'U'))
1.1  mrg 	{
1.1  mrg 	  buffer->cur += 2;
1.1  mrg 	  if (warn_bidi_p)
1.1  mrg 	    {
1.1  mrg 	      location_t loc;
1.1  mrg 	      bidi::kind kind = get_bidi_ucn (pfile,
1.1  mrg 					      buffer->cur,
1.1  mrg 					      buffer->cur[-1] == 'U',
1.1  mrg 					      &loc);
1.1  mrg 	      maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc);
1.1  mrg 	    }
1.1  mrg 	  if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
1.1  mrg 			      state, &s, NULL, NULL))
1.1  mrg 	    return true;
1.1  mrg 	  buffer->cur -= 2;
1.1  mrg 	}
1.1  mrg     }
1.1  mrg
1.1  mrg   return false;
1.1  mrg }
1.1  mrg
1.1  mrg /* Helper function to issue error about improper __VA_OPT__ use.  */
1.1  mrg static void
1.1  mrg maybe_va_opt_error (cpp_reader *pfile)
1.1  mrg {
1.1  mrg   if (CPP_PEDANTIC (pfile) && !CPP_OPTION (pfile, va_opt))
1.1  mrg     {
1.1  mrg       /* __VA_OPT__ should not be accepted at all, but allow it in
1.1  mrg 	 system headers.  */
1.1  mrg       if (!_cpp_in_system_header (pfile))
1.1  mrg 	cpp_error (pfile, CPP_DL_PEDWARN,
1.1  mrg 		   "__VA_OPT__ is not available until C++20");
1.1  mrg     }
1.1  mrg   else if (!pfile->state.va_args_ok)
1.1  mrg     {
1.1  mrg       /* __VA_OPT__ should only appear in the replacement list of a
1.1  mrg 	 variadic macro.  */
1.1  mrg       cpp_error (pfile, CPP_DL_PEDWARN,
1.1  mrg 		 "__VA_OPT__ can only appear in the expansion"
1.1  mrg 		 " of a C++20 variadic macro");
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Helper function to get the cpp_hashnode of the identifier BASE.  */
1.1  mrg static cpp_hashnode *
1.1  mrg lex_identifier_intern (cpp_reader *pfile, const uchar *base)
1.1  mrg {
1.1  mrg   cpp_hashnode *result;
1.1  mrg   const uchar *cur;
1.1  mrg   unsigned int len;
1.1  mrg   unsigned int hash = HT_HASHSTEP (0, *base);
1.1  mrg
1.1  mrg   cur = base + 1;
1.1  mrg   while (ISIDNUM (*cur))
1.1  mrg     {
1.1  mrg       hash = HT_HASHSTEP (hash, *cur);
1.1  mrg       cur++;
1.1  mrg     }
1.1  mrg   len = cur - base;
1.1  mrg   hash = HT_HASHFINISH (hash, len);
1.1  mrg   result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1.1  mrg 					      base, len, hash, HT_ALLOC));
1.1  mrg
1.1  mrg   /* Rarely, identifiers require diagnostics when lexed.  */
1.1  mrg   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1.1  mrg 			&& !pfile->state.skipping, 0))
1.1  mrg     {
1.1  mrg       /* It is allowed to poison the same identifier twice.  */
1.1  mrg       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1.1  mrg 	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1.1  mrg 		   NODE_NAME (result));
1.1  mrg
1.1  mrg       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1.1  mrg 	 replacement list of a variadic macro.  */
1.1  mrg       if (result == pfile->spec_nodes.n__VA_ARGS__
1.1  mrg 	  && !pfile->state.va_args_ok)
1.1  mrg 	{
1.1  mrg 	  if (CPP_OPTION (pfile, cplusplus))
1.1  mrg 	    cpp_error (pfile, CPP_DL_PEDWARN,
1.1  mrg 		       "__VA_ARGS__ can only appear in the expansion"
1.1  mrg 		       " of a C++11 variadic macro");
1.1  mrg 	  else
1.1  mrg 	    cpp_error (pfile, CPP_DL_PEDWARN,
1.1  mrg 		       "__VA_ARGS__ can only appear in the expansion"
1.1  mrg 		       " of a C99 variadic macro");
1.1  mrg 	}
1.1  mrg
1.1  mrg       if (result == pfile->spec_nodes.n__VA_OPT__)
1.1  mrg 	maybe_va_opt_error (pfile);
1.1  mrg
1.1  mrg       /* For -Wc++-compat, warn about use of C++ named operators.  */
1.1  mrg       if (result->flags & NODE_WARN_OPERATOR)
1.1  mrg 	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1.1  mrg 		     "identifier \"%s\" is a special operator name in C++",
1.1  mrg 		     NODE_NAME (result));
1.1  mrg     }
1.1  mrg
1.1  mrg   return result;
1.1  mrg }
1.1  mrg
1.1  mrg /* Get the cpp_hashnode of an identifier specified by NAME in
1.1  mrg    the current cpp_reader object.  If none is found, NULL is returned.  */
1.1  mrg cpp_hashnode *
1.1  mrg _cpp_lex_identifier (cpp_reader *pfile, const char *name)
1.1  mrg {
1.1  mrg   cpp_hashnode *result;
1.1  mrg   result = lex_identifier_intern (pfile, (uchar *) name);
1.1  mrg   return result;
1.1  mrg }
1.1  mrg
1.1  mrg /* Lex an identifier starting at BUFFER->CUR - 1.  */
1.1  mrg static cpp_hashnode *
1.1  mrg lex_identifier (cpp_reader *pfile, const uchar *base, bool starts_ucn,
1.1  mrg 		struct normalize_state *nst, cpp_hashnode **spelling)
1.1  mrg {
1.1  mrg   cpp_hashnode *result;
1.1  mrg   const uchar *cur;
1.1  mrg   unsigned int len;
1.1  mrg   unsigned int hash = HT_HASHSTEP (0, *base);
1.1  mrg   const bool warn_bidi_p = pfile->warn_bidi_p ();
1.1  mrg
1.1  mrg   cur = pfile->buffer->cur;
1.1  mrg   if (! starts_ucn)
1.1  mrg     {
1.1  mrg       while (ISIDNUM (*cur))
1.1  mrg 	{
1.1  mrg 	  hash = HT_HASHSTEP (hash, *cur);
1.1  mrg 	  cur++;
1.1  mrg 	}
1.1  mrg       NORMALIZE_STATE_UPDATE_IDNUM (nst, *(cur - 1));
1.1  mrg     }
1.1  mrg   pfile->buffer->cur = cur;
1.1  mrg   if (starts_ucn || forms_identifier_p (pfile, false, nst))
1.1  mrg     {
1.1  mrg       /* Slower version for identifiers containing UCNs
1.1  mrg 	 or extended chars (including $).  */
1.1  mrg       do {
1.1  mrg 	while (ISIDNUM (*pfile->buffer->cur))
1.1  mrg 	  {
1.1  mrg 	    NORMALIZE_STATE_UPDATE_IDNUM (nst, *pfile->buffer->cur);
1.1  mrg 	    pfile->buffer->cur++;
1.1  mrg 	  }
1.1  mrg       } while (forms_identifier_p (pfile, false, nst));
1.1  mrg       if (warn_bidi_p)
1.1  mrg 	maybe_warn_bidi_on_close (pfile, pfile->buffer->cur);
1.1  mrg       result = _cpp_interpret_identifier (pfile, base,
1.1  mrg 					  pfile->buffer->cur - base);
1.1  mrg       *spelling = cpp_lookup (pfile, base, pfile->buffer->cur - base);
1.1  mrg     }
1.1  mrg   else
1.1  mrg     {
1.1  mrg       len = cur - base;
1.1  mrg       hash = HT_HASHFINISH (hash, len);
1.1  mrg
1.1  mrg       result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1.1  mrg 						  base, len, hash, HT_ALLOC));
1.1  mrg       *spelling = result;
1.1  mrg     }
1.1  mrg
1.1  mrg   /* Rarely, identifiers require diagnostics when lexed.  */
1.1  mrg   if (__builtin_expect ((result->flags & NODE_DIAGNOSTIC)
1.1  mrg 			&& !pfile->state.skipping, 0))
1.1  mrg     {
1.1  mrg       /* It is allowed to poison the same identifier twice.  */
1.1  mrg       if ((result->flags & NODE_POISONED) && !pfile->state.poisoned_ok)
1.1  mrg 	cpp_error (pfile, CPP_DL_ERROR, "attempt to use poisoned \"%s\"",
1.1  mrg 		   NODE_NAME (result));
1.1  mrg
1.1  mrg       /* Constraint 6.10.3.5: __VA_ARGS__ should only appear in the
1.1  mrg 	 replacement list of a variadic macro.  */
1.1  mrg       if (result == pfile->spec_nodes.n__VA_ARGS__
1.1  mrg 	  && !pfile->state.va_args_ok)
1.1  mrg 	{
1.1  mrg 	  if (CPP_OPTION (pfile, cplusplus))
1.1  mrg 	    cpp_error (pfile, CPP_DL_PEDWARN,
1.1  mrg 		       "__VA_ARGS__ can only appear in the expansion"
1.1  mrg 		       " of a C++11 variadic macro");
1.1  mrg 	  else
1.1  mrg 	    cpp_error (pfile, CPP_DL_PEDWARN,
1.1  mrg 		       "__VA_ARGS__ can only appear in the expansion"
1.1  mrg 		       " of a C99 variadic macro");
1.1  mrg 	}
1.1  mrg
1.1  mrg       /* __VA_OPT__ should only appear in the replacement list of a
1.1  mrg 	 variadic macro.  */
1.1  mrg       if (result == pfile->spec_nodes.n__VA_OPT__)
1.1  mrg 	maybe_va_opt_error (pfile);
1.1  mrg
1.1  mrg       /* For -Wc++-compat, warn about use of C++ named operators.  */
1.1  mrg       if (result->flags & NODE_WARN_OPERATOR)
1.1  mrg 	cpp_warning (pfile, CPP_W_CXX_OPERATOR_NAMES,
1.1  mrg 		     "identifier \"%s\" is a special operator name in C++",
1.1  mrg 		     NODE_NAME (result));
1.1  mrg     }
1.1  mrg
1.1  mrg   return result;
1.1  mrg }
1.1  mrg
1.1  mrg /* Lex a number to NUMBER starting at BUFFER->CUR - 1.  */
1.1  mrg static void
1.1  mrg lex_number (cpp_reader *pfile, cpp_string *number,
1.1  mrg 	    struct normalize_state *nst)
1.1  mrg {
1.1  mrg   const uchar *cur;
1.1  mrg   const uchar *base;
1.1  mrg   uchar *dest;
1.1  mrg
1.1  mrg   base = pfile->buffer->cur - 1;
1.1  mrg   do
1.1  mrg     {
1.1  mrg       const uchar *adj_digit_sep = NULL;
1.1  mrg       cur = pfile->buffer->cur;
1.1  mrg
1.1  mrg       /* N.B. ISIDNUM does not include $.  */
1.1  mrg       while (ISIDNUM (*cur)
1.1  mrg 	     || (*cur == '.' && !DIGIT_SEP (cur[-1]))
1.1  mrg 	     || DIGIT_SEP (*cur)
1.1  mrg 	     || (VALID_SIGN (*cur, cur[-1]) && !DIGIT_SEP (cur[-2])))
1.1  mrg 	{
1.1  mrg 	  NORMALIZE_STATE_UPDATE_IDNUM (nst, *cur);
1.1  mrg 	  /* Adjacent digit separators do not form part of the pp-number syntax.
1.1  mrg 	     However, they can safely be diagnosed here as an error, since '' is
1.1  mrg 	     not a valid preprocessing token.  */
1.1  mrg 	  if (DIGIT_SEP (*cur) && DIGIT_SEP (cur[-1]) && !adj_digit_sep)
1.1  mrg 	    adj_digit_sep = cur;
1.1  mrg 	  cur++;
1.1  mrg 	}
1.1  mrg       /* A number can't end with a digit separator.  */
1.1  mrg       while (cur > pfile->buffer->cur && DIGIT_SEP (cur[-1]))
1.1  mrg 	--cur;
1.1  mrg       if (adj_digit_sep && adj_digit_sep < cur)
1.1  mrg 	cpp_error (pfile, CPP_DL_ERROR, "adjacent digit separators");
1.1  mrg
1.1  mrg       pfile->buffer->cur = cur;
1.1  mrg     }
1.1  mrg   while (forms_identifier_p (pfile, false, nst));
1.1  mrg
1.1  mrg   number->len = cur - base;
1.1  mrg   dest = _cpp_unaligned_alloc (pfile, number->len + 1);
1.1  mrg   memcpy (dest, base, number->len);
1.1  mrg   dest[number->len] = '\0';
1.1  mrg   number->text = dest;
1.1  mrg }
1.1  mrg
1.1  mrg /* Create a token of type TYPE with a literal spelling.  */
1.1  mrg static void
1.1  mrg create_literal (cpp_reader *pfile, cpp_token *token, const uchar *base,
1.1  mrg 		unsigned int len, enum cpp_ttype type)
1.1  mrg {
1.1  mrg   token->type = type;
1.1  mrg   token->val.str.len = len;
1.1  mrg   token->val.str.text = cpp_alloc_token_string (pfile, base, len);
1.1  mrg }
1.1  mrg
1.1  mrg const uchar *
1.1  mrg cpp_alloc_token_string (cpp_reader *pfile,
1.1  mrg 			const unsigned char *ptr, unsigned len)
1.1  mrg {
1.1  mrg   uchar *dest = _cpp_unaligned_alloc (pfile, len + 1);
1.1  mrg
1.1  mrg   dest[len] = 0;
1.1  mrg   memcpy (dest, ptr, len);
1.1  mrg   return dest;
1.1  mrg }
1.1  mrg
1.1  mrg /* A pair of raw buffer pointers.  The currently open one is [1], the
1.1  mrg    first one is [0].  Used for string literal lexing.  */
1.1  mrg struct lit_accum {
1.1  mrg   _cpp_buff *first;
1.1  mrg   _cpp_buff *last;
1.1  mrg   const uchar *rpos;
1.1  mrg   size_t accum;
1.1  mrg
1.1  mrg   lit_accum ()
1.1  mrg     : first (NULL), last (NULL), rpos (0), accum (0)
1.1  mrg   {
1.1  mrg   }
1.1  mrg
1.1  mrg   void append (cpp_reader *, const uchar *, size_t);
1.1  mrg
1.1  mrg   void read_begin (cpp_reader *);
1.1  mrg   bool reading_p () const
1.1  mrg   {
1.1  mrg     return rpos != NULL;
1.1  mrg   }
1.1  mrg   char read_char ()
1.1  mrg   {
1.1  mrg     char c = *rpos++;
1.1  mrg     if (rpos == BUFF_FRONT (last))
1.1  mrg       rpos = NULL;
1.1  mrg     return c;
1.1  mrg   }
1.1  mrg };
1.1  mrg
1.1  mrg /* Subroutine of lex_raw_string: Append LEN chars from BASE to the buffer
1.1  mrg    sequence from *FIRST_BUFF_P to LAST_BUFF_P.  */
1.1  mrg
1.1  mrg void
1.1  mrg lit_accum::append (cpp_reader *pfile, const uchar *base, size_t len)
1.1  mrg {
1.1  mrg   if (!last)
1.1  mrg     /* Starting.  */
1.1  mrg     first = last = _cpp_get_buff (pfile, len);
1.1  mrg   else if (len > BUFF_ROOM (last))
1.1  mrg     {
1.1  mrg       /* There is insufficient room in the buffer.  Copy what we can,
1.1  mrg 	 and then either extend or create a new one.  */
1.1  mrg       size_t room = BUFF_ROOM (last);
1.1  mrg       memcpy (BUFF_FRONT (last), base, room);
1.1  mrg       BUFF_FRONT (last) += room;
1.1  mrg       base += room;
1.1  mrg       len -= room;
1.1  mrg       accum += room;
1.1  mrg
1.1  mrg       gcc_checking_assert (!rpos);
1.1  mrg
1.1  mrg       last = _cpp_append_extend_buff (pfile, last, len);
1.1  mrg     }
1.1  mrg
1.1  mrg   memcpy (BUFF_FRONT (last), base, len);
1.1  mrg   BUFF_FRONT (last) += len;
1.1  mrg   accum += len;
1.1  mrg }
1.1  mrg
1.1  mrg void
1.1  mrg lit_accum::read_begin (cpp_reader *pfile)
1.1  mrg {
1.1  mrg   /* We never accumulate more than 4 chars to read.  */
1.1  mrg   if (BUFF_ROOM (last) < 4)
1.1  mrg
1.1  mrg     last = _cpp_append_extend_buff (pfile, last, 4);
1.1  mrg   rpos = BUFF_FRONT (last);
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns true if a macro has been defined.
1.1  mrg    This might not work if compile with -save-temps,
1.1  mrg    or preprocess separately from compilation.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg is_macro(cpp_reader *pfile, const uchar *base)
1.1  mrg {
1.1  mrg   const uchar *cur = base;
1.1  mrg   if (! ISIDST (*cur))
1.1  mrg     return false;
1.1  mrg   unsigned int hash = HT_HASHSTEP (0, *cur);
1.1  mrg   ++cur;
1.1  mrg   while (ISIDNUM (*cur))
1.1  mrg     {
1.1  mrg       hash = HT_HASHSTEP (hash, *cur);
1.1  mrg       ++cur;
1.1  mrg     }
1.1  mrg   hash = HT_HASHFINISH (hash, cur - base);
1.1  mrg
1.1  mrg   cpp_hashnode *result = CPP_HASHNODE (ht_lookup_with_hash (pfile->hash_table,
1.1  mrg 					base, cur - base, hash, HT_NO_INSERT));
1.1  mrg
1.1  mrg   return result && cpp_macro_p (result);
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns true if a literal suffix does not have the expected form
1.1  mrg    and is defined as a macro.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg is_macro_not_literal_suffix(cpp_reader *pfile, const uchar *base)
1.1  mrg {
1.1  mrg   /* User-defined literals outside of namespace std must start with a single
1.1  mrg      underscore, so assume anything of that form really is a UDL suffix.
1.1  mrg      We don't need to worry about UDLs defined inside namespace std because
1.1  mrg      their names are reserved, so cannot be used as macro names in valid
1.1  mrg      programs.  */
1.1  mrg   if (base[0] == '_' && base[1] != '_')
1.1  mrg     return false;
1.1  mrg   return is_macro (pfile, base);
1.1  mrg }
1.1  mrg
1.1  mrg /* Lexes a raw string.  The stored string contains the spelling,
1.1  mrg    including double quotes, delimiter string, '(' and ')', any leading
1.1  mrg    'L', 'u', 'U' or 'u8' and 'R' modifier.  The created token contains
1.1  mrg    the type of the literal, or CPP_OTHER if it was not properly
1.1  mrg    terminated.
1.1  mrg
1.1  mrg    BASE is the start of the token.  Updates pfile->buffer->cur to just
1.1  mrg    after the lexed string.
1.1  mrg
1.1  mrg    The spelling is NUL-terminated, but it is not guaranteed that this
1.1  mrg    is the first NUL since embedded NULs are preserved.  */
1.1  mrg
1.1  mrg static void
1.1  mrg lex_raw_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
1.1  mrg {
1.1  mrg   const uchar *pos = base;
1.1  mrg   const bool warn_bidi_p = pfile->warn_bidi_p ();
1.1  mrg
1.1  mrg   /* 'tis a pity this information isn't passed down from the lexer's
1.1  mrg      initial categorization of the token.  */
1.1  mrg   enum cpp_ttype type = CPP_STRING;
1.1  mrg
1.1  mrg   if (*pos == 'L')
1.1  mrg     {
1.1  mrg       type = CPP_WSTRING;
1.1  mrg       pos++;
1.1  mrg     }
1.1  mrg   else if (*pos == 'U')
1.1  mrg     {
1.1  mrg       type = CPP_STRING32;
1.1  mrg       pos++;
1.1  mrg     }
1.1  mrg   else if (*pos == 'u')
1.1  mrg     {
1.1  mrg       if (pos[1] == '8')
1.1  mrg 	{
1.1  mrg 	  type = CPP_UTF8STRING;
1.1  mrg 	  pos++;
1.1  mrg 	}
1.1  mrg       else
1.1  mrg 	type = CPP_STRING16;
1.1  mrg       pos++;
1.1  mrg     }
1.1  mrg
1.1  mrg   gcc_checking_assert (pos[0] == 'R' && pos[1] == '"');
1.1  mrg   pos += 2;
1.1  mrg
1.1  mrg   _cpp_line_note *note = &pfile->buffer->notes[pfile->buffer->cur_note];
1.1  mrg
1.1  mrg   /* Skip notes before the ".  */
1.1  mrg   while (note->pos < pos)
1.1  mrg     ++note;
1.1  mrg
1.1  mrg   lit_accum accum;
1.1  mrg
1.1  mrg   uchar prefix[17];
1.1  mrg   unsigned prefix_len = 0;
1.1  mrg   enum Phase
1.1  mrg   {
1.1  mrg    PHASE_PREFIX = -2,
1.1  mrg    PHASE_NONE = -1,
1.1  mrg    PHASE_SUFFIX = 0
1.1  mrg   } phase = PHASE_PREFIX;
1.1  mrg
1.1  mrg   for (;;)
1.1  mrg     {
1.1  mrg       gcc_checking_assert (note->pos >= pos);
1.1  mrg
1.1  mrg       /* Undo any escaped newlines and trigraphs.  */
1.1  mrg       if (!accum.reading_p () && note->pos == pos)
1.1  mrg 	switch (note->type)
1.1  mrg 	  {
1.1  mrg 	  case '\\':
1.1  mrg 	  case ' ':
1.1  mrg 	    /* Restore backslash followed by newline.  */
1.1  mrg 	    accum.append (pfile, base, pos - base);
1.1  mrg 	    base = pos;
1.1  mrg 	    accum.read_begin (pfile);
1.1  mrg 	    accum.append (pfile, UC"\\", 1);
1.1  mrg
1.1  mrg 	  after_backslash:
1.1  mrg 	    if (note->type == ' ')
1.1  mrg 	      /* GNU backslash whitespace newline extension.  FIXME
1.1  mrg 		 could be any sequence of non-vertical space.  When we
1.1  mrg 		 can properly restore any such sequence, we should
1.1  mrg 		 mark this note as handled so _cpp_process_line_notes
1.1  mrg 		 doesn't warn.  */
1.1  mrg 	      accum.append (pfile, UC" ", 1);
1.1  mrg
1.1  mrg 	    accum.append (pfile, UC"\n", 1);
1.1  mrg 	    note++;
1.1  mrg 	    break;
1.1  mrg
1.1  mrg 	  case '\n':
1.1  mrg 	    /* This can happen for ??/<NEWLINE> when trigraphs are not
1.1  mrg 	       being interpretted.  */
1.1  mrg 	    gcc_checking_assert (!CPP_OPTION (pfile, trigraphs));
1.1  mrg 	    note->type = 0;
1.1  mrg 	    note++;
1.1  mrg 	    break;
1.1  mrg
1.1  mrg 	  default:
1.1  mrg 	    gcc_checking_assert (_cpp_trigraph_map[note->type]);
1.1  mrg
1.1  mrg 	    /* Don't warn about this trigraph in
1.1  mrg 	       _cpp_process_line_notes, since trigraphs show up as
1.1  mrg 	       trigraphs in raw strings.  */
1.1  mrg 	    uchar type = note->type;
1.1  mrg 	    note->type = 0;
1.1  mrg
1.1  mrg 	    if (CPP_OPTION (pfile, trigraphs))
1.1  mrg 	      {
1.1  mrg 		accum.append (pfile, base, pos - base);
1.1  mrg 		base = pos;
1.1  mrg 		accum.read_begin (pfile);
1.1  mrg 		accum.append (pfile, UC"??", 2);
1.1  mrg 		accum.append (pfile, &type, 1);
1.1  mrg
1.1  mrg 		/* ??/ followed by newline gets two line notes, one for
1.1  mrg 		   the trigraph and one for the backslash/newline.  */
1.1  mrg 		if (type == '/' && note[1].pos == pos)
1.1  mrg 		  {
1.1  mrg 		    note++;
1.1  mrg 		    gcc_assert (note->type == '\\' || note->type == ' ');
1.1  mrg 		    goto after_backslash;
1.1  mrg 		  }
1.1  mrg 		/* Skip the replacement character.  */
1.1  mrg 		base = ++pos;
1.1  mrg 	      }
1.1  mrg
1.1  mrg 	    note++;
1.1  mrg 	    break;
1.1  mrg 	  }
1.1  mrg
1.1  mrg       /* Now get a char to process.  Either from an expanded note, or
1.1  mrg 	 from the line buffer.  */
1.1  mrg       bool read_note = accum.reading_p ();
1.1  mrg       char c = read_note ? accum.read_char () : *pos++;
1.1  mrg
1.1  mrg       if (phase == PHASE_PREFIX)
1.1  mrg 	{
1.1  mrg 	  if (c == '(')
1.1  mrg 	    {
1.1  mrg 	      /* Done.  */
1.1  mrg 	      phase = PHASE_NONE;
1.1  mrg 	      prefix[prefix_len++] = '"';
1.1  mrg 	    }
1.1  mrg 	  else if (prefix_len < 16
1.1  mrg 		   /* Prefix chars are any of the basic character set,
1.1  mrg 		      [lex.charset] except for '
1.1  mrg 		      ()\\\t\v\f\n'. Optimized for a contiguous
1.1  mrg 		      alphabet.  */
1.1  mrg 		   /* Unlike a switch, this collapses down to one or
1.1  mrg 		      two shift and bitmask operations on an ASCII
1.1  mrg 		      system, with an outlier or two.   */
1.1  mrg 		   && (('Z' - 'A' == 25
1.1  mrg 			? ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
1.1  mrg 			: ISIDST (c))
1.1  mrg 		       || (c >= '0' && c <= '9')
1.1  mrg 		       || c == '_' || c == '{' || c == '}'
1.1  mrg 		       || c == '[' || c == ']' || c == '#'
1.1  mrg 		       || c == '<' || c == '>' || c == '%'
1.1  mrg 		       || c == ':' || c == ';' || c == '.' || c == '?'
1.1  mrg 		       || c == '*' || c == '+' || c == '-' || c == '/'
1.1  mrg 		       || c == '^' || c == '&' || c == '|' || c == '~'
1.1  mrg 		       || c == '!' || c == '=' || c == ','
1.1  mrg 		       || c == '"' || c == '\''))
1.1  mrg 	    prefix[prefix_len++] = c;
1.1  mrg 	  else
1.1  mrg 	    {
1.1  mrg 	      /* Something is wrong.  */
1.1  mrg 	      int col = CPP_BUF_COLUMN (pfile->buffer, pos) + read_note;
1.1  mrg 	      if (prefix_len == 16)
1.1  mrg 		cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1.1  mrg 				     col, "raw string delimiter longer "
1.1  mrg 				     "than 16 characters");
1.1  mrg 	      else if (c == '\n')
1.1  mrg 		cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1.1  mrg 				     col, "invalid new-line in raw "
1.1  mrg 				     "string delimiter");
1.1  mrg 	      else
1.1  mrg 		cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc,
1.1  mrg 				     col, "invalid character '%c' in "
1.1  mrg 				     "raw string delimiter", c);
1.1  mrg 	      type = CPP_OTHER;
1.1  mrg 	      phase = PHASE_NONE;
1.1  mrg 	      /* Continue until we get a close quote, that's probably
1.1  mrg 		 the best failure mode.  */
1.1  mrg 	      prefix_len = 0;
1.1  mrg 	    }
1.1  mrg 	  if (c != '\n')
1.1  mrg 	    continue;
1.1  mrg 	}
1.1  mrg
1.1  mrg       if (phase != PHASE_NONE)
1.1  mrg 	{
1.1  mrg 	  if (prefix[phase] != c)
1.1  mrg 	    phase = PHASE_NONE;
1.1  mrg 	  else if (unsigned (phase + 1) == prefix_len)
1.1  mrg 	    break;
1.1  mrg 	  else
1.1  mrg 	    {
1.1  mrg 	      phase = Phase (phase + 1);
1.1  mrg 	      continue;
1.1  mrg 	    }
1.1  mrg 	}
1.1  mrg
1.1  mrg       if (!prefix_len && c == '"')
1.1  mrg 	/* Failure mode lexing.  */
1.1  mrg 	goto out;
1.1  mrg       else if (prefix_len && c == ')')
1.1  mrg 	phase = PHASE_SUFFIX;
1.1  mrg       else if (!read_note && c == '\n')
1.1  mrg 	{
1.1  mrg 	  pos--;
1.1  mrg 	  pfile->buffer->cur = pos;
1.1  mrg 	  if (pfile->state.in_directive
1.1  mrg 	      || (pfile->state.parsing_args
1.1  mrg 		  && pfile->buffer->next_line >= pfile->buffer->rlimit))
1.1  mrg 	    {
1.1  mrg 	      cpp_error_with_line (pfile, CPP_DL_ERROR, token->src_loc, 0,
1.1  mrg 				   "unterminated raw string");
1.1  mrg 	      type = CPP_OTHER;
1.1  mrg 	      goto out;
1.1  mrg 	    }
1.1  mrg
1.1  mrg 	  accum.append (pfile, base, pos - base + 1);
1.1  mrg 	  _cpp_process_line_notes (pfile, false);
1.1  mrg
1.1  mrg 	  if (pfile->buffer->next_line < pfile->buffer->rlimit)
1.1  mrg 	    CPP_INCREMENT_LINE (pfile, 0);
1.1  mrg 	  pfile->buffer->need_line = true;
1.1  mrg
1.1  mrg 	  if (!_cpp_get_fresh_line (pfile))
1.1  mrg 	    {
1.1  mrg 	      /* We ran out of file and failed to get a line.  */
1.1  mrg 	      location_t src_loc = token->src_loc;
1.1  mrg 	      token->type = CPP_EOF;
1.1  mrg 	      /* Tell the compiler the line number of the EOF token.  */
1.1  mrg 	      token->src_loc = pfile->line_table->highest_line;
1.1  mrg 	      token->flags = BOL;
1.1  mrg 	      if (accum.first)
1.1  mrg 		_cpp_release_buff (pfile, accum.first);
1.1  mrg 	      cpp_error_with_line (pfile, CPP_DL_ERROR, src_loc, 0,
1.1  mrg 				   "unterminated raw string");
1.1  mrg 	      /* Now pop the buffer that _cpp_get_fresh_line did not.  */
1.1  mrg 	      _cpp_pop_buffer (pfile);
1.1  mrg 	      return;
1.1  mrg 	    }
1.1  mrg
1.1  mrg 	  pos = base = pfile->buffer->cur;
1.1  mrg 	  note = &pfile->buffer->notes[pfile->buffer->cur_note];
1.1  mrg 	}
1.1  mrg       else if (__builtin_expect ((unsigned char) c == bidi::utf8_start, 0)
1.1  mrg 	       && warn_bidi_p)
1.1  mrg 	{
1.1  mrg 	  location_t loc;
1.1  mrg 	  bidi::kind kind = get_bidi_utf8 (pfile, pos - 1, &loc);
1.1  mrg 	  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
1.1  mrg 	}
1.1  mrg     }
1.1  mrg
1.1  mrg   if (warn_bidi_p)
1.1  mrg     maybe_warn_bidi_on_close (pfile, pos);
1.1  mrg
1.1  mrg   if (CPP_OPTION (pfile, user_literals))
1.1  mrg     {
1.1  mrg       /* If a string format macro, say from inttypes.h, is placed touching
1.1  mrg 	 a string literal it could be parsed as a C++11 user-defined string
1.1  mrg 	 literal thus breaking the program.  */
1.1  mrg       if (is_macro_not_literal_suffix (pfile, pos))
1.1  mrg 	{
1.1  mrg 	  /* Raise a warning, but do not consume subsequent tokens.  */
1.1  mrg 	  if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1.1  mrg 	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1.1  mrg 				   token->src_loc, 0,
1.1  mrg 				   "invalid suffix on literal; C++11 requires "
1.1  mrg 				   "a space between literal and string macro");
1.1  mrg 	}
1.1  mrg       /* Grab user defined literal suffix.  */
1.1  mrg       else if (ISIDST (*pos))
1.1  mrg 	{
1.1  mrg 	  type = cpp_userdef_string_add_type (type);
1.1  mrg 	  ++pos;
1.1  mrg
1.1  mrg 	  while (ISIDNUM (*pos))
1.1  mrg 	    ++pos;
1.1  mrg 	}
1.1  mrg     }
1.1  mrg
1.1  mrg  out:
1.1  mrg   pfile->buffer->cur = pos;
1.1  mrg   if (!accum.accum)
1.1  mrg     create_literal (pfile, token, base, pos - base, type);
1.1  mrg   else
1.1  mrg     {
1.1  mrg       size_t extra_len = pos - base;
1.1  mrg       uchar *dest = _cpp_unaligned_alloc (pfile, accum.accum + extra_len + 1);
1.1  mrg
1.1  mrg       token->type = type;
1.1  mrg       token->val.str.len = accum.accum + extra_len;
1.1  mrg       token->val.str.text = dest;
1.1  mrg       for (_cpp_buff *buf = accum.first; buf; buf = buf->next)
1.1  mrg 	{
1.1  mrg 	  size_t len = BUFF_FRONT (buf) - buf->base;
1.1  mrg 	  memcpy (dest, buf->base, len);
1.1  mrg 	  dest += len;
1.1  mrg 	}
1.1  mrg       _cpp_release_buff (pfile, accum.first);
1.1  mrg       memcpy (dest, base, extra_len);
1.1  mrg       dest[extra_len] = '\0';
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Lexes a string, character constant, or angle-bracketed header file
1.1  mrg    name.  The stored string contains the spelling, including opening
1.1  mrg    quote and any leading 'L', 'u', 'U' or 'u8' and optional
1.1  mrg    'R' modifier.  It returns the type of the literal, or CPP_OTHER
1.1  mrg    if it was not properly terminated, or CPP_LESS for an unterminated
1.1  mrg    header name which must be relexed as normal tokens.
1.1  mrg
1.1  mrg    The spelling is NUL-terminated, but it is not guaranteed that this
1.1  mrg    is the first NUL since embedded NULs are preserved.  */
1.1  mrg static void
1.1  mrg lex_string (cpp_reader *pfile, cpp_token *token, const uchar *base)
1.1  mrg {
1.1  mrg   bool saw_NUL = false;
1.1  mrg   const uchar *cur;
1.1  mrg   cppchar_t terminator;
1.1  mrg   enum cpp_ttype type;
1.1  mrg
1.1  mrg   cur = base;
1.1  mrg   terminator = *cur++;
1.1  mrg   if (terminator == 'L' || terminator == 'U')
1.1  mrg     terminator = *cur++;
1.1  mrg   else if (terminator == 'u')
1.1  mrg     {
1.1  mrg       terminator = *cur++;
1.1  mrg       if (terminator == '8')
1.1  mrg 	terminator = *cur++;
1.1  mrg     }
1.1  mrg   if (terminator == 'R')
1.1  mrg     {
1.1  mrg       lex_raw_string (pfile, token, base);
1.1  mrg       return;
1.1  mrg     }
1.1  mrg   if (terminator == '"')
1.1  mrg     type = (*base == 'L' ? CPP_WSTRING :
1.1  mrg 	    *base == 'U' ? CPP_STRING32 :
1.1  mrg 	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8STRING : CPP_STRING16)
1.1  mrg 			 : CPP_STRING);
1.1  mrg   else if (terminator == '\'')
1.1  mrg     type = (*base == 'L' ? CPP_WCHAR :
1.1  mrg 	    *base == 'U' ? CPP_CHAR32 :
1.1  mrg 	    *base == 'u' ? (base[1] == '8' ? CPP_UTF8CHAR : CPP_CHAR16)
1.1  mrg 			 : CPP_CHAR);
1.1  mrg   else
1.1  mrg     terminator = '>', type = CPP_HEADER_NAME;
1.1  mrg
1.1  mrg   const bool warn_bidi_p = pfile->warn_bidi_p ();
1.1  mrg   for (;;)
1.1  mrg     {
1.1  mrg       cppchar_t c = *cur++;
1.1  mrg
1.1  mrg       /* In #include-style directives, terminators are not escapable.  */
1.1  mrg       if (c == '\\' && !pfile->state.angled_headers && *cur != '\n')
1.1  mrg 	{
1.1  mrg 	  if ((cur[0] == 'u' || cur[0] == 'U') && warn_bidi_p)
1.1  mrg 	    {
1.1  mrg 	      location_t loc;
1.1  mrg 	      bidi::kind kind = get_bidi_ucn (pfile, cur + 1, cur[0] == 'U',
1.1  mrg 					      &loc);
1.1  mrg 	      maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/true, loc);
1.1  mrg 	    }
1.1  mrg 	  cur++;
1.1  mrg 	}
1.1  mrg       else if (c == terminator)
1.1  mrg 	{
1.1  mrg 	  if (warn_bidi_p)
1.1  mrg 	    maybe_warn_bidi_on_close (pfile, cur - 1);
1.1  mrg 	  break;
1.1  mrg 	}
1.1  mrg       else if (c == '\n')
1.1  mrg 	{
1.1  mrg 	  cur--;
1.1  mrg 	  /* Unmatched quotes always yield undefined behavior, but
1.1  mrg 	     greedy lexing means that what appears to be an unterminated
1.1  mrg 	     header name may actually be a legitimate sequence of tokens.  */
1.1  mrg 	  if (terminator == '>')
1.1  mrg 	    {
1.1  mrg 	      token->type = CPP_LESS;
1.1  mrg 	      return;
1.1  mrg 	    }
1.1  mrg 	  type = CPP_OTHER;
1.1  mrg 	  break;
1.1  mrg 	}
1.1  mrg       else if (c == '\0')
1.1  mrg 	saw_NUL = true;
1.1  mrg       else if (__builtin_expect (c == bidi::utf8_start, 0) && warn_bidi_p)
1.1  mrg 	{
1.1  mrg 	  location_t loc;
1.1  mrg 	  bidi::kind kind = get_bidi_utf8 (pfile, cur - 1, &loc);
1.1  mrg 	  maybe_warn_bidi_on_char (pfile, kind, /*ucn_p=*/false, loc);
1.1  mrg 	}
1.1  mrg     }
1.1  mrg
1.1  mrg   if (saw_NUL && !pfile->state.skipping)
1.1  mrg     cpp_error (pfile, CPP_DL_WARNING,
1.1  mrg 	       "null character(s) preserved in literal");
1.1  mrg
1.1  mrg   if (type == CPP_OTHER && CPP_OPTION (pfile, lang) != CLK_ASM)
1.1  mrg     cpp_error (pfile, CPP_DL_PEDWARN, "missing terminating %c character",
1.1  mrg 	       (int) terminator);
1.1  mrg
1.1  mrg   if (CPP_OPTION (pfile, user_literals))
1.1  mrg     {
1.1  mrg       /* If a string format macro, say from inttypes.h, is placed touching
1.1  mrg 	 a string literal it could be parsed as a C++11 user-defined string
1.1  mrg 	 literal thus breaking the program.  */
1.1  mrg       if (is_macro_not_literal_suffix (pfile, cur))
1.1  mrg 	{
1.1  mrg 	  /* Raise a warning, but do not consume subsequent tokens.  */
1.1  mrg 	  if (CPP_OPTION (pfile, warn_literal_suffix) && !pfile->state.skipping)
1.1  mrg 	    cpp_warning_with_line (pfile, CPP_W_LITERAL_SUFFIX,
1.1  mrg 				   token->src_loc, 0,
1.1  mrg 				   "invalid suffix on literal; C++11 requires "
1.1  mrg 				   "a space between literal and string macro");
1.1  mrg 	}
1.1  mrg       /* Grab user defined literal suffix.  */
1.1  mrg       else if (ISIDST (*cur))
1.1  mrg 	{
1.1  mrg 	  type = cpp_userdef_char_add_type (type);
1.1  mrg 	  type = cpp_userdef_string_add_type (type);
1.1  mrg           ++cur;
1.1  mrg
1.1  mrg 	  while (ISIDNUM (*cur))
1.1  mrg 	    ++cur;
1.1  mrg 	}
1.1  mrg     }
1.1  mrg   else if (CPP_OPTION (pfile, cpp_warn_cxx11_compat)
1.1  mrg 	   && is_macro (pfile, cur)
1.1  mrg 	   && !pfile->state.skipping)
1.1  mrg     cpp_warning_with_line (pfile, CPP_W_CXX11_COMPAT,
1.1  mrg 			   token->src_loc, 0, "C++11 requires a space "
1.1  mrg 			   "between string literal and macro");
1.1  mrg
1.1  mrg   pfile->buffer->cur = cur;
1.1  mrg   create_literal (pfile, token, base, cur - base, type);
1.1  mrg }
1.1  mrg
1.1  mrg /* Return the comment table. The client may not make any assumption
1.1  mrg    about the ordering of the table.  */
1.1  mrg cpp_comment_table *
1.1  mrg cpp_get_comments (cpp_reader *pfile)
1.1  mrg {
1.1  mrg   return &pfile->comments;
1.1  mrg }
1.1  mrg
1.1  mrg /* Append a comment to the end of the comment table. */
1.1  mrg static void
1.1  mrg store_comment (cpp_reader *pfile, cpp_token *token)
1.1  mrg {
1.1  mrg   int len;
1.1  mrg
1.1  mrg   if (pfile->comments.allocated == 0)
1.1  mrg     {
1.1  mrg       pfile->comments.allocated = 256;
1.1  mrg       pfile->comments.entries = (cpp_comment *) xmalloc
1.1  mrg 	(pfile->comments.allocated * sizeof (cpp_comment));
1.1  mrg     }
1.1  mrg
1.1  mrg   if (pfile->comments.count == pfile->comments.allocated)
1.1  mrg     {
1.1  mrg       pfile->comments.allocated *= 2;
1.1  mrg       pfile->comments.entries = (cpp_comment *) xrealloc
1.1  mrg 	(pfile->comments.entries,
1.1  mrg 	 pfile->comments.allocated * sizeof (cpp_comment));
1.1  mrg     }
1.1  mrg
1.1  mrg   len = token->val.str.len;
1.1  mrg
1.1  mrg   /* Copy comment. Note, token may not be NULL terminated. */
1.1  mrg   pfile->comments.entries[pfile->comments.count].comment =
1.1  mrg     (char *) xmalloc (sizeof (char) * (len + 1));
1.1  mrg   memcpy (pfile->comments.entries[pfile->comments.count].comment,
1.1  mrg 	  token->val.str.text, len);
1.1  mrg   pfile->comments.entries[pfile->comments.count].comment[len] = '\0';
1.1  mrg
1.1  mrg   /* Set source location. */
1.1  mrg   pfile->comments.entries[pfile->comments.count].sloc = token->src_loc;
1.1  mrg
1.1  mrg   /* Increment the count of entries in the comment table. */
1.1  mrg   pfile->comments.count++;
1.1  mrg }
1.1  mrg
1.1  mrg /* The stored comment includes the comment start and any terminator.  */
1.1  mrg static void
1.1  mrg save_comment (cpp_reader *pfile, cpp_token *token, const unsigned char *from,
1.1  mrg 	      cppchar_t type)
1.1  mrg {
1.1  mrg   unsigned char *buffer;
1.2  mrg   unsigned int len, clen, i;
1.2  mrg   int convert_to_c = (pfile->state.in_directive || pfile->state.parsing_args)
1.1  mrg     && type == '/';
1.1  mrg
1.1  mrg   len = pfile->buffer->cur - from + 1; /* + 1 for the initial '/'.  */
1.1  mrg
1.1  mrg   /* C++ comments probably (not definitely) have moved past a new
1.1  mrg      line, which we don't want to save in the comment.  */
1.1  mrg   if (is_vspace (pfile->buffer->cur[-1]))
1.1  mrg     len--;
1.1  mrg
1.1  mrg   /* If we are currently in a directive or in argument parsing, then
1.1  mrg      we need to store all C++ comments as C comments internally, and
1.1  mrg      so we need to allocate a little extra space in that case.
1.1  mrg
1.1  mrg      Note that the only time we encounter a directive here is
1.2  mrg      when we are saving comments in a "#define".  */
1.1  mrg   clen = convert_to_c ? len + 2 : len;
1.1  mrg
1.1  mrg   buffer = _cpp_unaligned_alloc (pfile, clen);
1.1  mrg
1.1  mrg   token->type = CPP_COMMENT;
1.1  mrg   token->val.str.len = clen;
1.1  mrg   token->val.str.text = buffer;
1.1  mrg
1.1  mrg   buffer[0] = '/';
1.1  mrg   memcpy (buffer + 1, from, len - 1);
1.1  mrg
1.2  mrg   /* Finish conversion to a C comment, if necessary.  */
1.1  mrg   if (convert_to_c)
1.1  mrg     {
1.1  mrg       buffer[1] = '*';
1.1  mrg       buffer[clen - 2] = '*';
1.1  mrg       buffer[clen - 1] = '/';
1.1  mrg       /* As there can be in a C++ comments illegal sequences for C comments
1.1  mrg          we need to filter them out.  */
1.1  mrg       for (i = 2; i < (clen - 2); i++)
1.1  mrg         if (buffer[i] == '/' && (buffer[i - 1] == '*' || buffer[i + 1] == '*'))
1.1  mrg           buffer[i] = '|';
1.1  mrg     }
1.1  mrg
1.1  mrg   /* Finally store this comment for use by clients of libcpp. */
1.1  mrg   store_comment (pfile, token);
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns true if comment at COMMENT_START is a recognized FALLTHROUGH
1.1  mrg    comment.  */
1.1  mrg
1.1  mrg static bool
1.1  mrg fallthrough_comment_p (cpp_reader *pfile, const unsigned char *comment_start)
1.1  mrg {
1.1  mrg   const unsigned char *from = comment_start + 1;
1.1  mrg
1.1  mrg   switch (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough))
1.1  mrg     {
1.1  mrg       /* For both -Wimplicit-fallthrough=0 and -Wimplicit-fallthrough=5 we
1.1  mrg 	 don't recognize any comments.  The latter only checks attributes,
1.1  mrg 	 the former doesn't warn.  */
1.1  mrg     case 0:
1.1  mrg     default:
1.1  mrg       return false;
1.1  mrg       /* -Wimplicit-fallthrough=1 considers any comment, no matter what
1.1  mrg 	 content it has.  */
1.1  mrg     case 1:
1.1  mrg       return true;
1.1  mrg     case 2:
1.1  mrg       /* -Wimplicit-fallthrough=2 looks for (case insensitive)
1.1  mrg 	 .*falls?[ \t-]*thr(u|ough).* regex.  */
1.1  mrg       for (; (size_t) (pfile->buffer->cur - from) >= sizeof "fallthru" - 1;
1.1  mrg 	   from++)
1.1  mrg 	{
1.1  mrg 	  /* Is there anything like strpbrk with upper boundary, or
1.1  mrg 	     memchr looking for 2 characters rather than just one?  */
1.1  mrg 	  if (from[0] != 'f' && from[0] != 'F')
1.1  mrg 	    continue;
1.1  mrg 	  if (from[1] != 'a' && from[1] != 'A')
1.1  mrg 	    continue;
1.1  mrg 	  if (from[2] != 'l' && from[2] != 'L')
1.1  mrg 	    continue;
1.1  mrg 	  if (from[3] != 'l' && from[3] != 'L')
1.1  mrg 	    continue;
1.1  mrg 	  from += sizeof "fall" - 1;
1.1  mrg 	  if (from[0] == 's' || from[0] == 'S')
1.1  mrg 	    from++;
1.1  mrg 	  while (*from == ' ' || *from == '\t' || *from == '-')
1.1  mrg 	    from++;
1.1  mrg 	  if (from[0] != 't' && from[0] != 'T')
1.1  mrg 	    continue;
1.1  mrg 	  if (from[1] != 'h' && from[1] != 'H')
1.1  mrg 	    continue;
1.1  mrg 	  if (from[2] != 'r' && from[2] != 'R')
1.1  mrg 	    continue;
1.1  mrg 	  if (from[3] == 'u' || from[3] == 'U')
1.1  mrg 	    return true;
1.1  mrg 	  if (from[3] != 'o' && from[3] != 'O')
1.1  mrg 	    continue;
1.1  mrg 	  if (from[4] != 'u' && from[4] != 'U')
1.1  mrg 	    continue;
1.1  mrg 	  if (from[5] != 'g' && from[5] != 'G')
1.1  mrg 	    continue;
1.1  mrg 	  if (from[6] != 'h' && from[6] != 'H')
1.1  mrg 	    continue;
1.1  mrg 	  return true;
1.1  mrg 	}
1.1  mrg       return false;
1.1  mrg     case 3:
1.1  mrg     case 4:
1.1  mrg       break;
1.1  mrg     }
1.1  mrg
1.1  mrg   /* Whole comment contents:
1.1  mrg      -fallthrough
1.1  mrg      @fallthrough@
1.1  mrg    */
1.1  mrg   if (*from == '-' || *from == '@')
1.1  mrg     {
1.1  mrg       size_t len = sizeof "fallthrough" - 1;
1.1  mrg       if ((size_t) (pfile->buffer->cur - from - 1) < len)
1.1  mrg 	return false;
1.1  mrg       if (memcmp (from + 1, "fallthrough", len))
1.1  mrg 	return false;
1.1  mrg       if (*from == '@')
1.1  mrg 	{
1.1  mrg 	  if (from[len + 1] != '@')
1.1  mrg 	    return false;
1.1  mrg 	  len++;
1.1  mrg 	}
1.1  mrg       from += 1 + len;
1.1  mrg     }
1.1  mrg   /* Whole comment contents (regex):
1.1  mrg      lint -fallthrough[ \t]*
1.1  mrg    */
1.1  mrg   else if (*from == 'l')
1.1  mrg     {
1.1  mrg       size_t len = sizeof "int -fallthrough" - 1;
1.1  mrg       if ((size_t) (pfile->buffer->cur - from - 1) < len)
1.1  mrg 	return false;
1.1  mrg       if (memcmp (from + 1, "int -fallthrough", len))
1.1  mrg 	return false;
1.1  mrg       from += 1 + len;
1.1  mrg       while (*from == ' ' || *from == '\t')
1.1  mrg 	from++;
1.1  mrg     }
1.1  mrg   /* Whole comment contents (regex):
1.1  mrg      [ \t]*FALLTHR(U|OUGH)[ \t]*
1.1  mrg    */
1.1  mrg   else if (CPP_OPTION (pfile, cpp_warn_implicit_fallthrough) == 4)
1.1  mrg     {
1.1  mrg       while (*from == ' ' || *from == '\t')
1.1  mrg 	from++;
1.1  mrg       if ((size_t) (pfile->buffer->cur - from)  < sizeof "FALLTHRU" - 1)
1.1  mrg 	return false;
1.1  mrg       if (memcmp (from, "FALLTHR", sizeof "FALLTHR" - 1))
1.1  mrg 	return false;
1.1  mrg       from += sizeof "FALLTHR" - 1;
1.1  mrg       if (*from == 'U')
1.1  mrg 	from++;
1.1  mrg       else if ((size_t) (pfile->buffer->cur - from)  < sizeof "OUGH" - 1)
1.1  mrg 	return false;
1.1  mrg       else if (memcmp (from, "OUGH", sizeof "OUGH" - 1))
1.1  mrg 	return false;
1.1  mrg       else
1.1  mrg 	from += sizeof "OUGH" - 1;
1.1  mrg       while (*from == ' ' || *from == '\t')
1.1  mrg 	from++;
1.1  mrg     }
1.1  mrg   /* Whole comment contents (regex):
1.1  mrg      [ \t.!]*(ELSE,? |INTENTIONAL(LY)? )?FALL(S | |-)?THR(OUGH|U)[ \t.!]*(-[^\n\r]*)?
1.1  mrg      [ \t.!]*(Else,? |Intentional(ly)? )?Fall((s | |-)[Tt]|t)hr(ough|u)[ \t.!]*(-[^\n\r]*)?
1.1  mrg      [ \t.!]*([Ee]lse,? |[Ii]ntentional(ly)? )?fall(s | |-)?thr(ough|u)[ \t.!]*(-[^\n\r]*)?
1.1  mrg    */
1.1  mrg   else
1.1  mrg     {
1.1  mrg       while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
1.1  mrg 	from++;
1.1  mrg       unsigned char f = *from;
1.1  mrg       bool all_upper = false;
1.1  mrg       if (f == 'E' || f == 'e')
1.1  mrg 	{
1.1  mrg 	  if ((size_t) (pfile->buffer->cur - from)
1.1  mrg 	      < sizeof "else fallthru" - 1)
1.1  mrg 	    return false;
1.1  mrg 	  if (f == 'E' && memcmp (from + 1, "LSE", sizeof "LSE" - 1) == 0)
1.1  mrg 	    all_upper = true;
1.1  mrg 	  else if (memcmp (from + 1, "lse", sizeof "lse" - 1))
1.1  mrg 	    return false;
1.1  mrg 	  from += sizeof "else" - 1;
1.1  mrg 	  if (*from == ',')
1.1  mrg 	    from++;
1.1  mrg 	  if (*from != ' ')
1.1  mrg 	    return false;
1.1  mrg 	  from++;
1.1  mrg 	  if (all_upper && *from == 'f')
1.1  mrg 	    return false;
1.1  mrg 	  if (f == 'e' && *from == 'F')
1.1  mrg 	    return false;
1.1  mrg 	  f = *from;
1.1  mrg 	}
1.1  mrg       else if (f == 'I' || f == 'i')
1.1  mrg 	{
1.1  mrg 	  if ((size_t) (pfile->buffer->cur - from)
1.1  mrg 	      < sizeof "intentional fallthru" - 1)
1.1  mrg 	    return false;
1.1  mrg 	  if (f == 'I' && memcmp (from + 1, "NTENTIONAL",
1.1  mrg 				  sizeof "NTENTIONAL" - 1) == 0)
1.1  mrg 	    all_upper = true;
1.1  mrg 	  else if (memcmp (from + 1, "ntentional",
1.1  mrg 			   sizeof "ntentional" - 1))
1.1  mrg 	    return false;
1.1  mrg 	  from += sizeof "intentional" - 1;
1.1  mrg 	  if (*from == ' ')
1.1  mrg 	    {
1.1  mrg 	      from++;
1.1  mrg 	      if (all_upper && *from == 'f')
1.1  mrg 		return false;
1.1  mrg 	    }
1.1  mrg 	  else if (all_upper)
1.1  mrg 	    {
1.1  mrg 	      if (memcmp (from, "LY F", sizeof "LY F" - 1))
1.1  mrg 		return false;
1.1  mrg 	      from += sizeof "LY " - 1;
1.1  mrg 	    }
1.1  mrg 	  else
1.1  mrg 	    {
1.1  mrg 	      if (memcmp (from, "ly ", sizeof "ly " - 1))
1.1  mrg 		return false;
1.1  mrg 	      from += sizeof "ly " - 1;
1.1  mrg 	    }
1.1  mrg 	  if (f == 'i' && *from == 'F')
1.1  mrg 	    return false;
1.1  mrg 	  f = *from;
1.1  mrg 	}
1.1  mrg       if (f != 'F' && f != 'f')
1.1  mrg 	return false;
1.1  mrg       if ((size_t) (pfile->buffer->cur - from) < sizeof "fallthru" - 1)
1.1  mrg 	return false;
1.1  mrg       if (f == 'F' && memcmp (from + 1, "ALL", sizeof "ALL" - 1) == 0)
1.1  mrg 	all_upper = true;
1.1  mrg       else if (all_upper)
1.1  mrg 	return false;
1.1  mrg       else if (memcmp (from + 1, "all", sizeof "all" - 1))
1.1  mrg 	return false;
1.1  mrg       from += sizeof "fall" - 1;
1.1  mrg       if (*from == (all_upper ? 'S' : 's') && from[1] == ' ')
1.1  mrg 	from += 2;
1.1  mrg       else if (*from == ' ' || *from == '-')
1.1  mrg 	from++;
1.1  mrg       else if (*from != (all_upper ? 'T' : 't'))
1.1  mrg 	return false;
1.1  mrg       if ((f == 'f' || *from != 'T') && (all_upper || *from != 't'))
1.1  mrg 	return false;
1.1  mrg       if ((size_t) (pfile->buffer->cur - from) < sizeof "thru" - 1)
1.1  mrg 	return false;
1.1  mrg       if (memcmp (from + 1, all_upper ? "HRU" : "hru", sizeof "hru" - 1))
1.1  mrg 	{
1.1  mrg 	  if ((size_t) (pfile->buffer->cur - from) < sizeof "through" - 1)
1.1  mrg 	    return false;
1.1  mrg 	  if (memcmp (from + 1, all_upper ? "HROUGH" : "hrough",
1.1  mrg 		      sizeof "hrough" - 1))
1.1  mrg 	    return false;
1.1  mrg 	  from += sizeof "through" - 1;
1.1  mrg 	}
1.1  mrg       else
1.1  mrg 	from += sizeof "thru" - 1;
1.1  mrg       while (*from == ' ' || *from == '\t' || *from == '.' || *from == '!')
1.1  mrg 	from++;
1.1  mrg       if (*from == '-')
1.1  mrg 	{
1.1  mrg 	  from++;
1.1  mrg 	  if (*comment_start == '*')
1.1  mrg 	    {
1.1  mrg 	      do
1.1  mrg 		{
1.1  mrg 		  while (*from && *from != '*'
1.1  mrg 			 && *from != '\n' && *from != '\r')
1.1  mrg 		    from++;
1.1  mrg 		  if (*from != '*' || from[1] == '/')
1.1  mrg 		    break;
1.1  mrg 		  from++;
1.1  mrg 		}
1.1  mrg 	      while (1);
1.1  mrg 	    }
1.1  mrg 	  else
1.1  mrg 	    while (*from && *from != '\n' && *from != '\r')
1.1  mrg 	      from++;
1.1  mrg 	}
1.1  mrg     }
1.1  mrg   /* C block comment.  */
1.1  mrg   if (*comment_start == '*')
1.1  mrg     {
1.1  mrg       if (*from != '*' || from[1] != '/')
1.1  mrg 	return false;
1.1  mrg     }
1.1  mrg   /* C++ line comment.  */
1.1  mrg   else if (*from != '\n')
1.1  mrg     return false;
1.1  mrg
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* Allocate COUNT tokens for RUN.  */
1.1  mrg void
1.1  mrg _cpp_init_tokenrun (tokenrun *run, unsigned int count)
1.1  mrg {
1.1  mrg   run->base = XNEWVEC (cpp_token, count);
1.1  mrg   run->limit = run->base + count;
1.1  mrg   run->next = NULL;
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns the next tokenrun, or creates one if there is none.  */
1.1  mrg static tokenrun *
1.1  mrg next_tokenrun (tokenrun *run)
1.1  mrg {
1.1  mrg   if (run->next == NULL)
1.1  mrg     {
1.1  mrg       run->next = XNEW (tokenrun);
1.1  mrg       run->next->prev = run;
1.1  mrg       _cpp_init_tokenrun (run->next, 250);
1.1  mrg     }
1.1  mrg
1.1  mrg   return run->next;
1.1  mrg }
1.1  mrg
1.1  mrg /* Return the number of not yet processed token in a given
1.1  mrg    context.  */
1.1  mrg int
1.1  mrg _cpp_remaining_tokens_num_in_context (cpp_context *context)
1.1  mrg {
1.1  mrg   if (context->tokens_kind == TOKENS_KIND_DIRECT)
1.1  mrg     return (LAST (context).token - FIRST (context).token);
1.1  mrg   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
1.1  mrg 	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
1.1  mrg     return (LAST (context).ptoken - FIRST (context).ptoken);
1.1  mrg   else
1.1  mrg       abort ();
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns the token present at index INDEX in a given context.  If
1.1  mrg    INDEX is zero, the next token to be processed is returned.  */
1.1  mrg static const cpp_token*
1.1  mrg _cpp_token_from_context_at (cpp_context *context, int index)
1.1  mrg {
1.1  mrg   if (context->tokens_kind == TOKENS_KIND_DIRECT)
1.1  mrg     return &(FIRST (context).token[index]);
1.1  mrg   else if (context->tokens_kind == TOKENS_KIND_INDIRECT
1.1  mrg 	   || context->tokens_kind == TOKENS_KIND_EXTENDED)
1.1  mrg     return FIRST (context).ptoken[index];
1.1  mrg  else
1.1  mrg    abort ();
1.1  mrg }
1.1  mrg
1.1  mrg /* Look ahead in the input stream.  */
1.1  mrg const cpp_token *
1.1  mrg cpp_peek_token (cpp_reader *pfile, int index)
1.1  mrg {
1.1  mrg   cpp_context *context = pfile->context;
1.1  mrg   const cpp_token *peektok;
1.1  mrg   int count;
1.1  mrg
1.1  mrg   /* First, scan through any pending cpp_context objects.  */
1.1  mrg   while (context->prev)
1.1  mrg     {
1.1  mrg       ptrdiff_t sz = _cpp_remaining_tokens_num_in_context (context);
1.1  mrg
1.1  mrg       if (index < (int) sz)
1.1  mrg         return _cpp_token_from_context_at (context, index);
1.1  mrg       index -= (int) sz;
1.1  mrg       context = context->prev;
1.1  mrg     }
1.1  mrg
1.1  mrg   /* We will have to read some new tokens after all (and do so
1.1  mrg      without invalidating preceding tokens).  */
1.1  mrg   count = index;
1.1  mrg   pfile->keep_tokens++;
1.1  mrg
1.1  mrg   /* For peeked tokens temporarily disable line_change reporting,
1.1  mrg      until the tokens are parsed for real.  */
1.1  mrg   void (*line_change) (cpp_reader *, const cpp_token *, int)
1.1  mrg     = pfile->cb.line_change;
1.1  mrg   pfile->cb.line_change = NULL;
1.1  mrg
1.1  mrg   do
1.1  mrg     {
1.1  mrg       peektok = _cpp_lex_token (pfile);
1.1  mrg       if (peektok->type == CPP_EOF)
1.1  mrg 	{
1.1  mrg 	  index--;
1.1  mrg 	  break;
1.1  mrg 	}
1.1  mrg       else if (peektok->type == CPP_PRAGMA)
1.1  mrg 	{
1.1  mrg 	  /* Don't peek past a pragma.  */
1.1  mrg 	  if (peektok == &pfile->directive_result)
1.1  mrg 	    /* Save the pragma in the buffer.  */
1.1  mrg 	    *pfile->cur_token++ = *peektok;
1.1  mrg 	  index--;
1.1  mrg 	  break;
1.1  mrg 	}
1.1  mrg     }
1.1  mrg   while (index--);
1.1  mrg
1.1  mrg   _cpp_backup_tokens_direct (pfile, count - index);
1.1  mrg   pfile->keep_tokens--;
1.1  mrg   pfile->cb.line_change = line_change;
1.1  mrg
1.1  mrg   return peektok;
1.1  mrg }
1.1  mrg
1.1  mrg /* Allocate a single token that is invalidated at the same time as the
1.1  mrg    rest of the tokens on the line.  Has its line and col set to the
1.1  mrg    same as the last lexed token, so that diagnostics appear in the
1.1  mrg    right place.  */
1.1  mrg cpp_token *
1.1  mrg _cpp_temp_token (cpp_reader *pfile)
1.1  mrg {
1.1  mrg   cpp_token *old, *result;
1.1  mrg   ptrdiff_t sz = pfile->cur_run->limit - pfile->cur_token;
1.1  mrg   ptrdiff_t la = (ptrdiff_t) pfile->lookaheads;
1.1  mrg
1.1  mrg   old = pfile->cur_token - 1;
1.1  mrg   /* Any pre-existing lookaheads must not be clobbered.  */
1.1  mrg   if (la)
1.1  mrg     {
1.1  mrg       if (sz <= la)
1.1  mrg         {
1.1  mrg           tokenrun *next = next_tokenrun (pfile->cur_run);
1.1  mrg
1.1  mrg           if (sz < la)
1.1  mrg             memmove (next->base + 1, next->base,
1.1  mrg                      (la - sz) * sizeof (cpp_token));
1.1  mrg
1.1  mrg           next->base[0] = pfile->cur_run->limit[-1];
1.1  mrg         }
1.1  mrg
1.1  mrg       if (sz > 1)
1.1  mrg         memmove (pfile->cur_token + 1, pfile->cur_token,
1.1  mrg                  MIN (la, sz - 1) * sizeof (cpp_token));
1.1  mrg     }
1.1  mrg
1.1  mrg   if (!sz && pfile->cur_token == pfile->cur_run->limit)
1.1  mrg     {
1.1  mrg       pfile->cur_run = next_tokenrun (pfile->cur_run);
1.1  mrg       pfile->cur_token = pfile->cur_run->base;
1.1  mrg     }
1.1  mrg
1.1  mrg   result = pfile->cur_token++;
1.1  mrg   result->src_loc = old->src_loc;
1.1  mrg   return result;
1.1  mrg }
1.1  mrg
1.1  mrg /* We're at the beginning of a logical line (so not in
1.1  mrg   directives-mode) and RESULT is a CPP_NAME with NODE_MODULE set.  See
1.1  mrg   if we should enter deferred_pragma mode to tokenize the rest of the
1.1  mrg   line as a module control-line.  */
1.1  mrg
1.1  mrg static void
1.1  mrg cpp_maybe_module_directive (cpp_reader *pfile, cpp_token *result)
1.1  mrg {
1.1  mrg   unsigned backup = 0; /* Tokens we peeked.  */
1.1  mrg   cpp_hashnode *node = result->val.node.node;
1.1  mrg   cpp_token *peek = result;
1.1  mrg   cpp_token *keyword = peek;
1.1  mrg   cpp_hashnode *(&n_modules)[spec_nodes::M_HWM][2] = pfile->spec_nodes.n_modules;
1.1  mrg   int header_count = 0;
1.1  mrg
1.1  mrg   /* Make sure the incoming state is as we expect it.  This way we
1.1  mrg      can restore it using constants.  */
1.1  mrg   gcc_checking_assert (!pfile->state.in_deferred_pragma
1.1  mrg 		       && !pfile->state.skipping
1.1  mrg 		       && !pfile->state.parsing_args
1.1  mrg 		       && !pfile->state.angled_headers
1.1  mrg 		       && (pfile->state.save_comments
1.1  mrg 			   == !CPP_OPTION (pfile, discard_comments)));
1.1  mrg
1.1  mrg   /* Enter directives mode sufficiently for peeking.  We don't have
1.1  mrg      to actually set in_directive.  */
1.1  mrg   pfile->state.in_deferred_pragma = true;
1.1  mrg
1.1  mrg   /* These two fields are needed to process tokenization in deferred
1.1  mrg      pragma mode.  They are not used outside deferred pragma mode or
1.1  mrg      directives mode.  */
1.1  mrg   pfile->state.pragma_allow_expansion = true;
1.1  mrg   pfile->directive_line = result->src_loc;
1.1  mrg
1.1  mrg   /* Saving comments is incompatible with directives mode.   */
1.1  mrg   pfile->state.save_comments = 0;
1.1  mrg
1.1  mrg   if (node == n_modules[spec_nodes::M_EXPORT][0])
1.1  mrg     {
1.1  mrg       peek = _cpp_lex_direct (pfile);
1.1  mrg       keyword = peek;
1.1  mrg       backup++;
1.1  mrg       if (keyword->type != CPP_NAME)
1.1  mrg 	goto not_module;
1.1  mrg       node = keyword->val.node.node;
1.1  mrg       if (!(node->flags & NODE_MODULE))
1.1  mrg 	goto not_module;
1.1  mrg     }
1.1  mrg
1.1  mrg   if (node == n_modules[spec_nodes::M__IMPORT][0])
1.1  mrg     /* __import  */
1.1  mrg     header_count = backup + 2 + 16;
1.1  mrg   else if (node == n_modules[spec_nodes::M_IMPORT][0])
1.1  mrg     /* import  */
1.1  mrg     header_count = backup + 2 + (CPP_OPTION (pfile, preprocessed) ? 16 : 0);
1.1  mrg   else if (node == n_modules[spec_nodes::M_MODULE][0])
1.1  mrg     ; /* module  */
1.1  mrg   else
1.1  mrg     goto not_module;
1.1  mrg
1.1  mrg   /* We've seen [export] {module|import|__import}.  Check the next token.  */
1.1  mrg   if (header_count)
1.1  mrg     /* After '{,__}import' a header name may appear.  */
1.1  mrg     pfile->state.angled_headers = true;
1.1  mrg   peek = _cpp_lex_direct (pfile);
1.1  mrg   backup++;
1.1  mrg
1.1  mrg   /* ... import followed by identifier, ':', '<' or
1.1  mrg      header-name preprocessing tokens, or module
1.1  mrg      followed by cpp-identifier, ':' or ';' preprocessing
1.1  mrg      tokens.  C++ keywords are not yet relevant.  */
1.1  mrg   if (peek->type == CPP_NAME
1.1  mrg       || peek->type == CPP_COLON
1.1  mrg       ||  (header_count
1.1  mrg 	   ? (peek->type == CPP_LESS
1.1  mrg 	      || (peek->type == CPP_STRING && peek->val.str.text[0] != 'R')
1.1  mrg 	      || peek->type == CPP_HEADER_NAME)
1.1  mrg 	   : peek->type == CPP_SEMICOLON))
1.1  mrg     {
1.1  mrg       pfile->state.pragma_allow_expansion = !CPP_OPTION (pfile, preprocessed);
1.1  mrg       if (!pfile->state.pragma_allow_expansion)
1.1  mrg 	pfile->state.prevent_expansion++;
1.1  mrg
1.1  mrg       if (!header_count && linemap_included_from
1.1  mrg 	  (LINEMAPS_LAST_ORDINARY_MAP (pfile->line_table)))
1.1  mrg 	cpp_error_with_line (pfile, CPP_DL_ERROR, keyword->src_loc, 0,
1.1  mrg 			     "module control-line cannot be in included file");
1.1  mrg
1.1  mrg       /* The first one or two tokens cannot be macro names.  */
1.1  mrg       for (int ix = backup; ix--;)
1.1  mrg 	{
1.1  mrg 	  cpp_token *tok = ix ? keyword : result;
1.1  mrg 	  cpp_hashnode *node = tok->val.node.node;
1.1  mrg
1.1  mrg 	  /* Don't attempt to expand the token.  */
1.1  mrg 	  tok->flags |= NO_EXPAND;
1.1  mrg 	  if (_cpp_defined_macro_p (node)
1.1  mrg 	      && _cpp_maybe_notify_macro_use (pfile, node, tok->src_loc)
1.1  mrg 	      && !cpp_fun_like_macro_p (node))
1.1  mrg 	    cpp_error_with_line (pfile, CPP_DL_ERROR, tok->src_loc, 0,
1.1  mrg 				 "module control-line \"%s\" cannot be"
1.1  mrg 				 " an object-like macro",
1.1  mrg 				 NODE_NAME (node));
1.1  mrg 	}
1.1  mrg
1.1  mrg       /* Map to underbar variants.  */
1.1  mrg       keyword->val.node.node = n_modules[header_count
1.1  mrg 					 ? spec_nodes::M_IMPORT
1.1  mrg 					 : spec_nodes::M_MODULE][1];
1.1  mrg       if (backup != 1)
1.1  mrg 	result->val.node.node = n_modules[spec_nodes::M_EXPORT][1];
1.1  mrg
1.1  mrg       /* Maybe tell the tokenizer we expect a header-name down the
1.1  mrg 	 road.  */
1.1  mrg       pfile->state.directive_file_token = header_count;
1.1  mrg     }
1.1  mrg   else
1.1  mrg     {
1.1  mrg     not_module:
1.1  mrg       /* Drop out of directive mode.  */
1.1  mrg       /* We aaserted save_comments had this value upon entry.  */
1.1  mrg       pfile->state.save_comments
1.1  mrg 	= !CPP_OPTION (pfile, discard_comments);
1.1  mrg       pfile->state.in_deferred_pragma = false;
1.1  mrg       /* Do not let this remain on.  */
1.1  mrg       pfile->state.angled_headers = false;
1.1  mrg     }
1.1  mrg
1.1  mrg   /* In either case we want to backup the peeked tokens.  */
1.1  mrg   if (backup)
1.1  mrg     {
1.1  mrg       /* If we saw EOL, we should drop it, because this isn't a module
1.1  mrg 	 control-line after all.  */
1.1  mrg       bool eol = peek->type == CPP_PRAGMA_EOL;
1.1  mrg       if (!eol || backup > 1)
1.1  mrg 	{
1.1  mrg 	  /* Put put the peeked tokens back  */
1.1  mrg 	  _cpp_backup_tokens_direct (pfile, backup);
1.1  mrg 	  /* But if the last one was an EOL, forget it.  */
1.1  mrg 	  if (eol)
1.1  mrg 	    pfile->lookaheads--;
1.1  mrg 	}
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Lex a token into RESULT (external interface).  Takes care of issues
1.1  mrg    like directive handling, token lookahead, multiple include
1.1  mrg    optimization and skipping.  */
1.1  mrg const cpp_token *
1.1  mrg _cpp_lex_token (cpp_reader *pfile)
1.1  mrg {
1.1  mrg   cpp_token *result;
1.1  mrg
1.1  mrg   for (;;)
1.1  mrg     {
1.1  mrg       if (pfile->cur_token == pfile->cur_run->limit)
1.1  mrg 	{
1.1  mrg 	  pfile->cur_run = next_tokenrun (pfile->cur_run);
1.1  mrg 	  pfile->cur_token = pfile->cur_run->base;
1.1  mrg 	}
1.1  mrg       /* We assume that the current token is somewhere in the current
1.1  mrg 	 run.  */
1.1  mrg       if (pfile->cur_token < pfile->cur_run->base
1.1  mrg 	  || pfile->cur_token >= pfile->cur_run->limit)
1.1  mrg 	abort ();
1.1  mrg
1.1  mrg       if (pfile->lookaheads)
1.1  mrg 	{
1.1  mrg 	  pfile->lookaheads--;
1.1  mrg 	  result = pfile->cur_token++;
1.1  mrg 	}
1.1  mrg       else
1.1  mrg 	result = _cpp_lex_direct (pfile);
1.1  mrg
1.1  mrg       if (result->flags & BOL)
1.1  mrg 	{
1.1  mrg 	  /* Is this a directive.  If _cpp_handle_directive returns
1.1  mrg 	     false, it is an assembler #.  */
1.1  mrg 	  if (result->type == CPP_HASH
1.1  mrg 	      /* 6.10.3 p 11: Directives in a list of macro arguments
1.1  mrg 		 gives undefined behavior.  This implementation
1.1  mrg 		 handles the directive as normal.  */
1.1  mrg 	      && pfile->state.parsing_args != 1)
1.1  mrg 	    {
1.1  mrg 	      if (_cpp_handle_directive (pfile, result->flags & PREV_WHITE))
1.1  mrg 		{
1.1  mrg 		  if (pfile->directive_result.type == CPP_PADDING)
1.1  mrg 		    continue;
1.1  mrg 		  result = &pfile->directive_result;
1.1  mrg 		}
1.1  mrg 	    }
1.1  mrg 	  else if (pfile->state.in_deferred_pragma)
1.1  mrg 	    result = &pfile->directive_result;
1.1  mrg 	  else if (result->type == CPP_NAME
1.1  mrg 		   && (result->val.node.node->flags & NODE_MODULE)
1.1  mrg 		   && !pfile->state.skipping
1.1  mrg 		   /* Unlike regular directives, we do not deal with
1.1  mrg 		      tokenizing module directives as macro arguments.
1.1  mrg 		      That's not permitted.  */
1.1  mrg 		   && !pfile->state.parsing_args)
1.1  mrg 	    {
1.1  mrg 	      /* P1857.  Before macro expansion, At start of logical
1.1  mrg 		 line ... */
1.1  mrg 	      /* We don't have to consider lookaheads at this point.  */
1.1  mrg 	      gcc_checking_assert (!pfile->lookaheads);
1.1  mrg
1.1  mrg 	      cpp_maybe_module_directive (pfile, result);
1.1  mrg 	    }
1.1  mrg
1.1  mrg 	  if (pfile->cb.line_change && !pfile->state.skipping)
1.1  mrg 	    pfile->cb.line_change (pfile, result, pfile->state.parsing_args);
1.1  mrg 	}
1.1  mrg
1.1  mrg       /* We don't skip tokens in directives.  */
1.1  mrg       if (pfile->state.in_directive || pfile->state.in_deferred_pragma)
1.1  mrg 	break;
1.1  mrg
1.1  mrg       /* Outside a directive, invalidate controlling macros.  At file
1.1  mrg 	 EOF, _cpp_lex_direct takes care of popping the buffer, so we never
1.1  mrg 	 get here and MI optimization works.  */
1.1  mrg       pfile->mi_valid = false;
1.1  mrg
1.1  mrg       if (!pfile->state.skipping || result->type == CPP_EOF)
1.1  mrg 	break;
1.1  mrg     }
1.1  mrg
1.1  mrg   return result;
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns true if a fresh line has been loaded.  */
1.1  mrg bool
1.1  mrg _cpp_get_fresh_line (cpp_reader *pfile)
1.1  mrg {
1.1  mrg   /* We can't get a new line until we leave the current directive.  */
1.1  mrg   if (pfile->state.in_directive)
1.1  mrg     return false;
1.1  mrg
1.1  mrg   for (;;)
1.1  mrg     {
1.1  mrg       cpp_buffer *buffer = pfile->buffer;
1.1  mrg
1.1  mrg       if (!buffer->need_line)
1.1  mrg 	return true;
1.1  mrg
1.1  mrg       if (buffer->next_line < buffer->rlimit)
1.1  mrg 	{
1.1  mrg 	  _cpp_clean_line (pfile);
1.1  mrg 	  return true;
1.1  mrg 	}
1.1  mrg
1.1  mrg       /* First, get out of parsing arguments state.  */
1.1  mrg       if (pfile->state.parsing_args)
1.1  mrg 	return false;
1.1  mrg
1.1  mrg       /* End of buffer.  Non-empty files should end in a newline.  */
1.1  mrg       if (buffer->buf != buffer->rlimit
1.1  mrg 	  && buffer->next_line > buffer->rlimit
1.1  mrg 	  && !buffer->from_stage3)
1.1  mrg 	{
1.1  mrg 	  /* Clip to buffer size.  */
1.1  mrg 	  buffer->next_line = buffer->rlimit;
1.1  mrg 	}
1.1  mrg
1.1  mrg       if (buffer->prev && !buffer->return_at_eof)
1.1  mrg 	_cpp_pop_buffer (pfile);
1.1  mrg       else
1.1  mrg 	{
1.1  mrg 	  /* End of translation.  Do not pop the buffer yet. Increment
1.1  mrg 	     line number so that the EOF token is on a line of its own
1.1  mrg 	     (_cpp_lex_direct doesn't increment in that case, because
1.1  mrg 	     it's hard for it to distinguish this special case). */
1.1  mrg 	  CPP_INCREMENT_LINE (pfile, 0);
1.1  mrg 	  return false;
1.1  mrg 	}
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg #define IF_NEXT_IS(CHAR, THEN_TYPE, ELSE_TYPE)		\
1.1  mrg   do							\
1.1  mrg     {							\
1.1  mrg       result->type = ELSE_TYPE;				\
1.1  mrg       if (*buffer->cur == CHAR)				\
1.1  mrg 	buffer->cur++, result->type = THEN_TYPE;	\
1.1  mrg     }							\
1.1  mrg   while (0)
1.1  mrg
1.1  mrg /* Lex a token into pfile->cur_token, which is also incremented, to
1.1  mrg    get diagnostics pointing to the correct location.
1.1  mrg
1.1  mrg    Does not handle issues such as token lookahead, multiple-include
1.1  mrg    optimization, directives, skipping etc.  This function is only
1.1  mrg    suitable for use by _cpp_lex_token, and in special cases like
1.1  mrg    lex_expansion_token which doesn't care for any of these issues.
1.1  mrg
1.1  mrg    When meeting a newline, returns CPP_EOF if parsing a directive,
1.1  mrg    otherwise returns to the start of the token buffer if permissible.
1.1  mrg    Returns the location of the lexed token.  */
1.1  mrg cpp_token *
1.1  mrg _cpp_lex_direct (cpp_reader *pfile)
1.1  mrg {
1.1  mrg   cppchar_t c;
1.1  mrg   cpp_buffer *buffer;
1.1  mrg   const unsigned char *comment_start;
1.1  mrg   bool fallthrough_comment = false;
1.1  mrg   cpp_token *result = pfile->cur_token++;
1.1  mrg
1.1  mrg  fresh_line:
1.1  mrg   result->flags = 0;
1.1  mrg   buffer = pfile->buffer;
1.1  mrg   if (buffer->need_line)
1.1  mrg     {
1.1  mrg       if (pfile->state.in_deferred_pragma)
1.1  mrg 	{
1.1  mrg 	  /* This can happen in cases like:
1.1  mrg 	     #define loop(x) whatever
1.1  mrg 	     #pragma omp loop
1.1  mrg 	     where when trying to expand loop we need to peek
1.1  mrg 	     next token after loop, but aren't still in_deferred_pragma
1.1  mrg 	     mode but are in in_directive mode, so buffer->need_line
1.1  mrg 	     is set, a CPP_EOF is peeked.  */
1.1  mrg 	  result->type = CPP_PRAGMA_EOL;
1.1  mrg 	  pfile->state.in_deferred_pragma = false;
1.1  mrg 	  if (!pfile->state.pragma_allow_expansion)
1.1  mrg 	    pfile->state.prevent_expansion--;
1.1  mrg 	  return result;
1.1  mrg 	}
1.1  mrg       if (!_cpp_get_fresh_line (pfile))
1.1  mrg 	{
1.1  mrg 	  result->type = CPP_EOF;
1.1  mrg 	  /* Not a real EOF in a directive or arg parsing -- we refuse
1.1  mrg   	     to advance to the next file now, and will once we're out
1.1  mrg   	     of those modes.  */
1.1  mrg 	  if (!pfile->state.in_directive && !pfile->state.parsing_args)
1.1  mrg 	    {
1.1  mrg 	      /* Tell the compiler the line number of the EOF token.  */
1.1  mrg 	      result->src_loc = pfile->line_table->highest_line;
1.1  mrg 	      result->flags = BOL;
1.1  mrg 	      /* Now pop the buffer that _cpp_get_fresh_line did not.  */
1.1  mrg 	      _cpp_pop_buffer (pfile);
1.1  mrg 	    }
1.1  mrg 	  return result;
1.1  mrg 	}
1.1  mrg       if (buffer != pfile->buffer)
1.1  mrg 	fallthrough_comment = false;
1.1  mrg       if (!pfile->keep_tokens)
1.1  mrg 	{
1.1  mrg 	  pfile->cur_run = &pfile->base_run;
1.1  mrg 	  result = pfile->base_run.base;
1.1  mrg 	  pfile->cur_token = result + 1;
1.1  mrg 	}
1.1  mrg       result->flags = BOL;
1.1  mrg       if (pfile->state.parsing_args == 2)
1.1  mrg 	result->flags |= PREV_WHITE;
1.1  mrg     }
1.1  mrg   buffer = pfile->buffer;
1.1  mrg  update_tokens_line:
1.1  mrg   result->src_loc = pfile->line_table->highest_line;
1.1  mrg
1.1  mrg  skipped_white:
1.1  mrg   if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1.1  mrg       && !pfile->overlaid_buffer)
1.1  mrg     {
1.1  mrg       _cpp_process_line_notes (pfile, false);
1.1  mrg       result->src_loc = pfile->line_table->highest_line;
1.1  mrg     }
1.1  mrg   c = *buffer->cur++;
1.1  mrg
1.1  mrg   if (pfile->forced_token_location)
1.1  mrg     result->src_loc = pfile->forced_token_location;
1.1  mrg   else
1.1  mrg     result->src_loc = linemap_position_for_column (pfile->line_table,
1.1  mrg 					  CPP_BUF_COLUMN (buffer, buffer->cur));
1.1  mrg
1.1  mrg   switch (c)
1.1  mrg     {
1.1  mrg     case ' ': case '\t': case '\f': case '\v': case '\0':
1.1  mrg       result->flags |= PREV_WHITE;
1.1  mrg       skip_whitespace (pfile, c);
1.1  mrg       goto skipped_white;
1.1  mrg
1.1  mrg     case '\n':
1.1  mrg       /* Increment the line, unless this is the last line ...  */
1.1  mrg       if (buffer->cur < buffer->rlimit
1.1  mrg 	  /* ... or this is a #include, (where _cpp_stack_file needs to
1.1  mrg 	     unwind by one line) ...  */
1.1  mrg 	  || (pfile->state.in_directive > 1
1.1  mrg 	      /* ... except traditional-cpp increments this elsewhere.  */
1.1  mrg 	      && !CPP_OPTION (pfile, traditional)))
1.1  mrg 	CPP_INCREMENT_LINE (pfile, 0);
1.1  mrg       buffer->need_line = true;
1.1  mrg       if (pfile->state.in_deferred_pragma)
1.1  mrg 	{
1.1  mrg 	  /* Produce the PRAGMA_EOL on this line.  File reading
1.1  mrg 	     ensures there is always a \n at end of the buffer, thus
1.1  mrg 	     in a deferred pragma we always see CPP_PRAGMA_EOL before
1.1  mrg 	     any CPP_EOF.  */
1.1  mrg 	  result->type = CPP_PRAGMA_EOL;
1.1  mrg 	  result->flags &= ~PREV_WHITE;
1.1  mrg 	  pfile->state.in_deferred_pragma = false;
1.1  mrg 	  if (!pfile->state.pragma_allow_expansion)
1.1  mrg 	    pfile->state.prevent_expansion--;
1.1  mrg 	  return result;
1.1  mrg 	}
1.1  mrg       goto fresh_line;
1.1  mrg
1.1  mrg     case '0': case '1': case '2': case '3': case '4':
1.1  mrg     case '5': case '6': case '7': case '8': case '9':
1.1  mrg       {
1.1  mrg 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1.1  mrg 	result->type = CPP_NUMBER;
1.1  mrg 	lex_number (pfile, &result->val.str, &nst);
1.1  mrg 	warn_about_normalization (pfile, result, &nst);
1.1  mrg 	break;
1.1  mrg       }
1.1  mrg
1.1  mrg     case 'L':
1.1  mrg     case 'u':
1.1  mrg     case 'U':
1.1  mrg     case 'R':
1.1  mrg       /* 'L', 'u', 'U', 'u8' or 'R' may introduce wide characters,
1.1  mrg 	 wide strings or raw strings.  */
1.1  mrg       if (c == 'L' || CPP_OPTION (pfile, rliterals)
1.1  mrg 	  || (c != 'R' && CPP_OPTION (pfile, uliterals)))
1.1  mrg 	{
1.1  mrg 	  if ((*buffer->cur == '\'' && c != 'R')
1.1  mrg 	      || *buffer->cur == '"'
1.1  mrg 	      || (*buffer->cur == 'R'
1.1  mrg 		  && c != 'R'
1.1  mrg 		  && buffer->cur[1] == '"'
1.1  mrg 		  && CPP_OPTION (pfile, rliterals))
1.1  mrg 	      || (*buffer->cur == '8'
1.1  mrg 		  && c == 'u'
1.1  mrg 		  && ((buffer->cur[1] == '"' || (buffer->cur[1] == '\''
1.1  mrg 				&& CPP_OPTION (pfile, utf8_char_literals)))
1.1  mrg 		      || (buffer->cur[1] == 'R' && buffer->cur[2] == '"'
1.1  mrg 			  && CPP_OPTION (pfile, rliterals)))))
1.1  mrg 	    {
1.1  mrg 	      lex_string (pfile, result, buffer->cur - 1);
1.1  mrg 	      break;
1.1  mrg 	    }
1.1  mrg 	}
1.1  mrg       /* Fall through.  */
1.1  mrg
1.1  mrg     case '_':
1.1  mrg     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1.1  mrg     case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1.1  mrg     case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1.1  mrg     case 's': case 't':           case 'v': case 'w': case 'x':
1.1  mrg     case 'y': case 'z':
1.1  mrg     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1.1  mrg     case 'G': case 'H': case 'I': case 'J': case 'K':
1.1  mrg     case 'M': case 'N': case 'O': case 'P': case 'Q':
1.1  mrg     case 'S': case 'T':           case 'V': case 'W': case 'X':
1.1  mrg     case 'Y': case 'Z':
1.1  mrg       result->type = CPP_NAME;
1.1  mrg       {
1.1  mrg 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1.1  mrg 	result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
1.1  mrg 						&nst,
1.1  mrg 						&result->val.node.spelling);
1.1  mrg 	warn_about_normalization (pfile, result, &nst);
1.1  mrg       }
1.1  mrg
1.1  mrg       /* Convert named operators to their proper types.  */
1.1  mrg       if (result->val.node.node->flags & NODE_OPERATOR)
1.1  mrg 	{
1.1  mrg 	  result->flags |= NAMED_OP;
1.1  mrg 	  result->type = (enum cpp_ttype) result->val.node.node->directive_index;
1.1  mrg 	}
1.1  mrg
1.1  mrg       /* Signal FALLTHROUGH comment followed by another token.  */
1.1  mrg       if (fallthrough_comment)
1.1  mrg 	result->flags |= PREV_FALLTHROUGH;
1.1  mrg       break;
1.1  mrg
1.1  mrg     case '\'':
1.1  mrg     case '"':
1.1  mrg       lex_string (pfile, result, buffer->cur - 1);
1.1  mrg       break;
1.1  mrg
1.1  mrg     case '/':
1.1  mrg       /* A potential block or line comment.  */
1.1  mrg       comment_start = buffer->cur;
1.1  mrg       c = *buffer->cur;
1.1  mrg
1.1  mrg       if (c == '*')
1.1  mrg 	{
1.1  mrg 	  if (_cpp_skip_block_comment (pfile))
1.1  mrg 	    cpp_error (pfile, CPP_DL_ERROR, "unterminated comment");
1.1  mrg 	}
1.1  mrg       else if (c == '/' && ! CPP_OPTION (pfile, traditional))
1.1  mrg 	{
1.1  mrg 	  /* Don't warn for system headers.  */
1.1  mrg 	  if (_cpp_in_system_header (pfile))
1.1  mrg 	    ;
1.1  mrg 	  /* Warn about comments if pedantically GNUC89, and not
1.1  mrg 	     in system headers.  */
1.1  mrg 	  else if (CPP_OPTION (pfile, lang) == CLK_GNUC89
1.1  mrg 		   && CPP_PEDANTIC (pfile)
1.1  mrg 		   && ! buffer->warned_cplusplus_comments)
1.1  mrg 	    {
1.1  mrg 	      if (cpp_error (pfile, CPP_DL_PEDWARN,
1.1  mrg 			     "C++ style comments are not allowed in ISO C90"))
1.1  mrg 		cpp_error (pfile, CPP_DL_NOTE,
1.1  mrg 			   "(this will be reported only once per input file)");
1.1  mrg 	      buffer->warned_cplusplus_comments = 1;
1.1  mrg 	    }
1.1  mrg 	  /* Or if specifically desired via -Wc90-c99-compat.  */
1.1  mrg 	  else if (CPP_OPTION (pfile, cpp_warn_c90_c99_compat) > 0
1.1  mrg 		   && ! CPP_OPTION (pfile, cplusplus)
1.1  mrg 		   && ! buffer->warned_cplusplus_comments)
1.1  mrg 	    {
1.1  mrg 	      if (cpp_error (pfile, CPP_DL_WARNING,
1.1  mrg 			     "C++ style comments are incompatible with C90"))
1.1  mrg 		cpp_error (pfile, CPP_DL_NOTE,
1.1  mrg 			   "(this will be reported only once per input file)");
1.1  mrg 	      buffer->warned_cplusplus_comments = 1;
1.1  mrg 	    }
1.1  mrg 	  /* In C89/C94, C++ style comments are forbidden.  */
1.1  mrg 	  else if ((CPP_OPTION (pfile, lang) == CLK_STDC89
1.1  mrg 		    || CPP_OPTION (pfile, lang) == CLK_STDC94))
1.1  mrg 	    {
1.1  mrg 	      /* But don't be confused about valid code such as
1.1  mrg 	         - // immediately followed by *,
1.1  mrg 		 - // in a preprocessing directive,
1.1  mrg 		 - // in an #if 0 block.  */
1.1  mrg 	      if (buffer->cur[1] == '*'
1.1  mrg 		  || pfile->state.in_directive
1.1  mrg 		  || pfile->state.skipping)
1.1  mrg 		{
1.1  mrg 		  result->type = CPP_DIV;
1.1  mrg 		  break;
1.1  mrg 		}
1.1  mrg 	      else if (! buffer->warned_cplusplus_comments)
1.1  mrg 		{
1.1  mrg 		  if (cpp_error (pfile, CPP_DL_ERROR,
1.1  mrg 				 "C++ style comments are not allowed in "
1.1  mrg 				 "ISO C90"))
1.1  mrg 		    cpp_error (pfile, CPP_DL_NOTE,
1.1  mrg 			       "(this will be reported only once per input "
1.1  mrg 			       "file)");
1.1  mrg 		  buffer->warned_cplusplus_comments = 1;
1.1  mrg 		}
1.1  mrg 	    }
1.1  mrg 	  if (skip_line_comment (pfile) && CPP_OPTION (pfile, warn_comments))
1.1  mrg 	    cpp_warning (pfile, CPP_W_COMMENTS, "multi-line comment");
1.1  mrg 	}
1.1  mrg       else if (c == '=')
1.1  mrg 	{
1.1  mrg 	  buffer->cur++;
1.1  mrg 	  result->type = CPP_DIV_EQ;
1.1  mrg 	  break;
1.1  mrg 	}
1.1  mrg       else
1.1  mrg 	{
1.1  mrg 	  result->type = CPP_DIV;
1.1  mrg 	  break;
1.1  mrg 	}
1.1  mrg
1.1  mrg       if (fallthrough_comment_p (pfile, comment_start))
1.1  mrg 	fallthrough_comment = true;
1.1  mrg
1.1  mrg       if (pfile->cb.comment)
1.1  mrg 	{
1.1  mrg 	  size_t len = pfile->buffer->cur - comment_start;
1.1  mrg 	  pfile->cb.comment (pfile, result->src_loc, comment_start - 1,
1.1  mrg 			     len + 1);
1.1  mrg 	}
1.1  mrg
1.1  mrg       if (!pfile->state.save_comments)
1.1  mrg 	{
1.1  mrg 	  result->flags |= PREV_WHITE;
1.1  mrg 	  goto update_tokens_line;
1.1  mrg 	}
1.1  mrg
1.1  mrg       if (fallthrough_comment)
1.1  mrg 	result->flags |= PREV_FALLTHROUGH;
1.1  mrg
1.1  mrg       /* Save the comment as a token in its own right.  */
1.1  mrg       save_comment (pfile, result, comment_start, c);
1.1  mrg       break;
1.1  mrg
1.1  mrg     case '<':
1.1  mrg       if (pfile->state.angled_headers)
1.1  mrg 	{
1.1  mrg 	  lex_string (pfile, result, buffer->cur - 1);
1.1  mrg 	  if (result->type != CPP_LESS)
1.1  mrg 	    break;
1.1  mrg 	}
1.1  mrg
1.1  mrg       result->type = CPP_LESS;
1.1  mrg       if (*buffer->cur == '=')
1.1  mrg 	{
1.1  mrg 	  buffer->cur++, result->type = CPP_LESS_EQ;
1.1  mrg 	  if (*buffer->cur == '>'
1.1  mrg 	      && CPP_OPTION (pfile, cplusplus)
1.1  mrg 	      && CPP_OPTION (pfile, lang) >= CLK_GNUCXX20)
1.1  mrg 	    buffer->cur++, result->type = CPP_SPACESHIP;
1.1  mrg 	}
1.1  mrg       else if (*buffer->cur == '<')
1.1  mrg 	{
1.1  mrg 	  buffer->cur++;
1.1  mrg 	  IF_NEXT_IS ('=', CPP_LSHIFT_EQ, CPP_LSHIFT);
1.1  mrg 	}
1.1  mrg       else if (CPP_OPTION (pfile, digraphs))
1.1  mrg 	{
1.1  mrg 	  if (*buffer->cur == ':')
1.1  mrg 	    {
1.1  mrg 	      /* C++11 [2.5/3 lex.pptoken], "Otherwise, if the next
1.1  mrg 		 three characters are <:: and the subsequent character
1.1  mrg 		 is neither : nor >, the < is treated as a preprocessor
1.1  mrg 		 token by itself".  */
1.1  mrg 	      if (CPP_OPTION (pfile, cplusplus)
1.1  mrg 		  && CPP_OPTION (pfile, lang) != CLK_CXX98
1.1  mrg 		  && CPP_OPTION (pfile, lang) != CLK_GNUCXX
1.1  mrg 		  && buffer->cur[1] == ':'
1.1  mrg 		  && buffer->cur[2] != ':' && buffer->cur[2] != '>')
1.1  mrg 		break;
1.1  mrg
1.1  mrg 	      buffer->cur++;
1.1  mrg 	      result->flags |= DIGRAPH;
1.1  mrg 	      result->type = CPP_OPEN_SQUARE;
1.1  mrg 	    }
1.1  mrg 	  else if (*buffer->cur == '%')
1.1  mrg 	    {
1.1  mrg 	      buffer->cur++;
1.1  mrg 	      result->flags |= DIGRAPH;
1.1  mrg 	      result->type = CPP_OPEN_BRACE;
1.1  mrg 	    }
1.1  mrg 	}
1.1  mrg       break;
1.1  mrg
1.1  mrg     case '>':
1.1  mrg       result->type = CPP_GREATER;
1.1  mrg       if (*buffer->cur == '=')
1.1  mrg 	buffer->cur++, result->type = CPP_GREATER_EQ;
1.1  mrg       else if (*buffer->cur == '>')
1.1  mrg 	{
1.1  mrg 	  buffer->cur++;
1.1  mrg 	  IF_NEXT_IS ('=', CPP_RSHIFT_EQ, CPP_RSHIFT);
1.1  mrg 	}
1.1  mrg       break;
1.1  mrg
1.1  mrg     case '%':
1.1  mrg       result->type = CPP_MOD;
1.1  mrg       if (*buffer->cur == '=')
1.1  mrg 	buffer->cur++, result->type = CPP_MOD_EQ;
1.1  mrg       else if (CPP_OPTION (pfile, digraphs))
1.1  mrg 	{
1.1  mrg 	  if (*buffer->cur == ':')
1.1  mrg 	    {
1.1  mrg 	      buffer->cur++;
1.1  mrg 	      result->flags |= DIGRAPH;
1.1  mrg 	      result->type = CPP_HASH;
1.1  mrg 	      if (*buffer->cur == '%' && buffer->cur[1] == ':')
1.1  mrg 		buffer->cur += 2, result->type = CPP_PASTE, result->val.token_no = 0;
1.1  mrg 	    }
1.1  mrg 	  else if (*buffer->cur == '>')
1.1  mrg 	    {
1.1  mrg 	      buffer->cur++;
1.1  mrg 	      result->flags |= DIGRAPH;
1.1  mrg 	      result->type = CPP_CLOSE_BRACE;
1.1  mrg 	    }
1.1  mrg 	}
1.1  mrg       break;
1.1  mrg
1.1  mrg     case '.':
1.1  mrg       result->type = CPP_DOT;
1.1  mrg       if (ISDIGIT (*buffer->cur))
1.1  mrg 	{
1.1  mrg 	  struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1.1  mrg 	  result->type = CPP_NUMBER;
1.1  mrg 	  lex_number (pfile, &result->val.str, &nst);
1.1  mrg 	  warn_about_normalization (pfile, result, &nst);
1.1  mrg 	}
1.1  mrg       else if (*buffer->cur == '.' && buffer->cur[1] == '.')
1.1  mrg 	buffer->cur += 2, result->type = CPP_ELLIPSIS;
1.1  mrg       else if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1.1  mrg 	buffer->cur++, result->type = CPP_DOT_STAR;
1.1  mrg       break;
1.1  mrg
1.1  mrg     case '+':
1.1  mrg       result->type = CPP_PLUS;
1.1  mrg       if (*buffer->cur == '+')
1.1  mrg 	buffer->cur++, result->type = CPP_PLUS_PLUS;
1.1  mrg       else if (*buffer->cur == '=')
1.1  mrg 	buffer->cur++, result->type = CPP_PLUS_EQ;
1.1  mrg       break;
1.1  mrg
1.1  mrg     case '-':
1.1  mrg       result->type = CPP_MINUS;
1.1  mrg       if (*buffer->cur == '>')
1.1  mrg 	{
1.1  mrg 	  buffer->cur++;
1.1  mrg 	  result->type = CPP_DEREF;
1.1  mrg 	  if (*buffer->cur == '*' && CPP_OPTION (pfile, cplusplus))
1.1  mrg 	    buffer->cur++, result->type = CPP_DEREF_STAR;
1.1  mrg 	}
1.1  mrg       else if (*buffer->cur == '-')
1.1  mrg 	buffer->cur++, result->type = CPP_MINUS_MINUS;
1.1  mrg       else if (*buffer->cur == '=')
1.1  mrg 	buffer->cur++, result->type = CPP_MINUS_EQ;
1.1  mrg       break;
1.1  mrg
1.1  mrg     case '&':
1.1  mrg       result->type = CPP_AND;
1.1  mrg       if (*buffer->cur == '&')
1.1  mrg 	buffer->cur++, result->type = CPP_AND_AND;
1.1  mrg       else if (*buffer->cur == '=')
1.1  mrg 	buffer->cur++, result->type = CPP_AND_EQ;
1.1  mrg       break;
1.1  mrg
1.1  mrg     case '|':
1.1  mrg       result->type = CPP_OR;
1.1  mrg       if (*buffer->cur == '|')
1.1  mrg 	buffer->cur++, result->type = CPP_OR_OR;
1.1  mrg       else if (*buffer->cur == '=')
1.1  mrg 	buffer->cur++, result->type = CPP_OR_EQ;
1.1  mrg       break;
1.1  mrg
1.1  mrg     case ':':
1.3  mrg       result->type = CPP_COLON;
1.3  mrg       if (*buffer->cur == ':')
1.3  mrg 	{
1.3  mrg 	  if (CPP_OPTION (pfile, scope))
1.3  mrg 	    buffer->cur++, result->type = CPP_SCOPE;
1.3  mrg 	  else
1.3  mrg 	    result->flags |= COLON_SCOPE;
1.1  mrg 	}
1.1  mrg       else if (*buffer->cur == '>' && CPP_OPTION (pfile, digraphs))
1.1  mrg 	{
1.1  mrg 	  buffer->cur++;
1.1  mrg 	  result->flags |= DIGRAPH;
1.1  mrg 	  result->type = CPP_CLOSE_SQUARE;
1.1  mrg 	}
1.1  mrg       break;
1.1  mrg
1.1  mrg     case '*': IF_NEXT_IS ('=', CPP_MULT_EQ, CPP_MULT); break;
1.1  mrg     case '=': IF_NEXT_IS ('=', CPP_EQ_EQ, CPP_EQ); break;
1.1  mrg     case '!': IF_NEXT_IS ('=', CPP_NOT_EQ, CPP_NOT); break;
1.1  mrg     case '^': IF_NEXT_IS ('=', CPP_XOR_EQ, CPP_XOR); break;
1.1  mrg     case '#': IF_NEXT_IS ('#', CPP_PASTE, CPP_HASH); result->val.token_no = 0; break;
1.1  mrg
1.1  mrg     case '?': result->type = CPP_QUERY; break;
1.1  mrg     case '~': result->type = CPP_COMPL; break;
1.1  mrg     case ',': result->type = CPP_COMMA; break;
1.1  mrg     case '(': result->type = CPP_OPEN_PAREN; break;
1.1  mrg     case ')': result->type = CPP_CLOSE_PAREN; break;
1.1  mrg     case '[': result->type = CPP_OPEN_SQUARE; break;
1.1  mrg     case ']': result->type = CPP_CLOSE_SQUARE; break;
1.1  mrg     case '{': result->type = CPP_OPEN_BRACE; break;
1.1  mrg     case '}': result->type = CPP_CLOSE_BRACE; break;
1.1  mrg     case ';': result->type = CPP_SEMICOLON; break;
1.1  mrg
1.1  mrg       /* @ is a punctuator in Objective-C.  */
1.1  mrg     case '@': result->type = CPP_ATSIGN; break;
1.1  mrg
1.1  mrg     default:
1.1  mrg       {
1.1  mrg 	const uchar *base = --buffer->cur;
1.1  mrg
1.1  mrg 	/* Check for an extended identifier ($ or UCN or UTF-8).  */
1.1  mrg 	struct normalize_state nst = INITIAL_NORMALIZE_STATE;
1.1  mrg 	if (forms_identifier_p (pfile, true, &nst))
1.1  mrg 	  {
1.1  mrg 	    result->type = CPP_NAME;
1.1  mrg 	    result->val.node.node = lex_identifier (pfile, base, true, &nst,
1.1  mrg 						    &result->val.node.spelling);
1.1  mrg 	    warn_about_normalization (pfile, result, &nst);
1.1  mrg 	    break;
1.1  mrg 	  }
1.1  mrg
1.1  mrg 	/* Otherwise this will form a CPP_OTHER token.  Parse valid UTF-8 as a
1.1  mrg 	   single token.  */
1.1  mrg 	buffer->cur++;
1.1  mrg 	if (c >= utf8_signifier)
1.1  mrg 	  {
1.1  mrg 	    const uchar *pstr = base;
1.1  mrg 	    cppchar_t s;
1.1  mrg 	    if (_cpp_valid_utf8 (pfile, &pstr, buffer->rlimit, 0, NULL, &s))
1.1  mrg 	      buffer->cur = pstr;
1.1  mrg 	  }
1.1  mrg 	create_literal (pfile, result, base, buffer->cur - base, CPP_OTHER);
1.1  mrg 	break;
1.1  mrg       }
1.1  mrg
1.1  mrg     }
1.1  mrg
1.1  mrg   /* Potentially convert the location of the token to a range.  */
1.1  mrg   if (result->src_loc >= RESERVED_LOCATION_COUNT
1.1  mrg       && result->type != CPP_EOF)
1.1  mrg     {
1.1  mrg       /* Ensure that any line notes are processed, so that we have the
1.1  mrg 	 correct physical line/column for the end-point of the token even
1.1  mrg 	 when a logical line is split via one or more backslashes.  */
1.1  mrg       if (buffer->cur >= buffer->notes[buffer->cur_note].pos
1.1  mrg 	  && !pfile->overlaid_buffer)
1.1  mrg 	_cpp_process_line_notes (pfile, false);
1.1  mrg
1.1  mrg       source_range tok_range;
1.1  mrg       tok_range.m_start = result->src_loc;
1.1  mrg       tok_range.m_finish
1.1  mrg 	= linemap_position_for_column (pfile->line_table,
1.1  mrg 				       CPP_BUF_COLUMN (buffer, buffer->cur));
1.1  mrg
1.1  mrg       result->src_loc = COMBINE_LOCATION_DATA (pfile->line_table,
1.1  mrg 					       result->src_loc,
1.1  mrg 					       tok_range, NULL);
1.1  mrg     }
1.1  mrg
1.1  mrg   return result;
1.1  mrg }
1.1  mrg
1.1  mrg /* An upper bound on the number of bytes needed to spell TOKEN.
1.1  mrg    Does not include preceding whitespace.  */
1.1  mrg unsigned int
1.1  mrg cpp_token_len (const cpp_token *token)
1.1  mrg {
1.1  mrg   unsigned int len;
1.1  mrg
1.1  mrg   switch (TOKEN_SPELL (token))
1.1  mrg     {
1.1  mrg     default:		len = 6;				break;
1.1  mrg     case SPELL_LITERAL:	len = token->val.str.len;		break;
1.1  mrg     case SPELL_IDENT:	len = NODE_LEN (token->val.node.node) * 10;	break;
1.1  mrg     }
1.1  mrg
1.1  mrg   return len;
1.1  mrg }
1.1  mrg
1.1  mrg /* Parse UTF-8 out of NAMEP and place a \U escape in BUFFER.
1.1  mrg    Return the number of bytes read out of NAME.  (There are always
1.1  mrg    10 bytes written to BUFFER.)  */
1.1  mrg
1.1  mrg static size_t
1.1  mrg utf8_to_ucn (unsigned char *buffer, const unsigned char *name)
1.1  mrg {
1.1  mrg   int j;
1.1  mrg   int ucn_len = 0;
1.1  mrg   int ucn_len_c;
1.1  mrg   unsigned t;
1.1  mrg   unsigned long utf32;
1.1  mrg
1.1  mrg   /* Compute the length of the UTF-8 sequence.  */
1.1  mrg   for (t = *name; t & 0x80; t <<= 1)
1.1  mrg     ucn_len++;
1.1  mrg
1.1  mrg   utf32 = *name & (0x7F >> ucn_len);
1.1  mrg   for (ucn_len_c = 1; ucn_len_c < ucn_len; ucn_len_c++)
1.1  mrg     {
1.1  mrg       utf32 = (utf32 << 6) | (*++name & 0x3F);
1.1  mrg
1.1  mrg       /* Ill-formed UTF-8.  */
1.1  mrg       if ((*name & ~0x3F) != 0x80)
1.1  mrg 	abort ();
1.1  mrg     }
1.1  mrg
1.1  mrg   *buffer++ = '\\';
1.1  mrg   *buffer++ = 'U';
1.1  mrg   for (j = 7; j >= 0; j--)
1.1  mrg     *buffer++ = "0123456789abcdef"[(utf32 >> (4 * j)) & 0xF];
1.1  mrg   return ucn_len;
1.1  mrg }
1.1  mrg
1.1  mrg /* Given a token TYPE corresponding to a digraph, return a pointer to
1.1  mrg    the spelling of the digraph.  */
1.1  mrg static const unsigned char *
1.1  mrg cpp_digraph2name (enum cpp_ttype type)
1.1  mrg {
1.1  mrg   return digraph_spellings[(int) type - (int) CPP_FIRST_DIGRAPH];
1.1  mrg }
1.1  mrg
1.1  mrg /* Write the spelling of an identifier IDENT, using UCNs, to BUFFER.
1.1  mrg    The buffer must already contain the enough space to hold the
1.1  mrg    token's spelling.  Returns a pointer to the character after the
1.1  mrg    last character written.  */
1.1  mrg unsigned char *
1.1  mrg _cpp_spell_ident_ucns (unsigned char *buffer, cpp_hashnode *ident)
1.1  mrg {
1.1  mrg   size_t i;
1.1  mrg   const unsigned char *name = NODE_NAME (ident);
1.1  mrg
1.1  mrg   for (i = 0; i < NODE_LEN (ident); i++)
1.1  mrg     if (name[i] & ~0x7F)
1.1  mrg       {
1.1  mrg 	i += utf8_to_ucn (buffer, name + i) - 1;
1.1  mrg 	buffer += 10;
1.1  mrg       }
1.1  mrg     else
1.1  mrg       *buffer++ = name[i];
1.1  mrg
1.1  mrg   return buffer;
1.1  mrg }
1.1  mrg
1.1  mrg /* Write the spelling of a token TOKEN to BUFFER.  The buffer must
1.1  mrg    already contain the enough space to hold the token's spelling.
1.1  mrg    Returns a pointer to the character after the last character written.
1.1  mrg    FORSTRING is true if this is to be the spelling after translation
1.1  mrg    phase 1 (with the original spelling of extended identifiers), false
1.1  mrg    if extended identifiers should always be written using UCNs (there is
1.1  mrg    no option for always writing them in the internal UTF-8 form).
1.1  mrg    FIXME: Would be nice if we didn't need the PFILE argument.  */
1.1  mrg unsigned char *
1.1  mrg cpp_spell_token (cpp_reader *pfile, const cpp_token *token,
1.1  mrg 		 unsigned char *buffer, bool forstring)
1.1  mrg {
1.1  mrg   switch (TOKEN_SPELL (token))
1.1  mrg     {
1.1  mrg     case SPELL_OPERATOR:
1.1  mrg       {
1.1  mrg 	const unsigned char *spelling;
1.1  mrg 	unsigned char c;
1.1  mrg
1.1  mrg 	if (token->flags & DIGRAPH)
1.1  mrg 	  spelling = cpp_digraph2name (token->type);
1.1  mrg 	else if (token->flags & NAMED_OP)
1.1  mrg 	  goto spell_ident;
1.1  mrg 	else
1.1  mrg 	  spelling = TOKEN_NAME (token);
1.1  mrg
1.1  mrg 	while ((c = *spelling++) != '\0')
1.1  mrg 	  *buffer++ = c;
1.1  mrg       }
1.1  mrg       break;
1.1  mrg
1.1  mrg     spell_ident:
1.1  mrg     case SPELL_IDENT:
1.1  mrg       if (forstring)
1.1  mrg 	{
1.1  mrg 	  memcpy (buffer, NODE_NAME (token->val.node.spelling),
1.1  mrg 		  NODE_LEN (token->val.node.spelling));
1.1  mrg 	  buffer += NODE_LEN (token->val.node.spelling);
1.1  mrg 	}
1.1  mrg       else
1.1  mrg 	buffer = _cpp_spell_ident_ucns (buffer, token->val.node.node);
1.1  mrg       break;
1.1  mrg
1.1  mrg     case SPELL_LITERAL:
1.1  mrg       memcpy (buffer, token->val.str.text, token->val.str.len);
1.1  mrg       buffer += token->val.str.len;
1.1  mrg       break;
1.1  mrg
1.1  mrg     case SPELL_NONE:
1.1  mrg       cpp_error (pfile, CPP_DL_ICE,
1.1  mrg 		 "unspellable token %s", TOKEN_NAME (token));
1.1  mrg       break;
1.1  mrg     }
1.1  mrg
1.1  mrg   return buffer;
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns TOKEN spelt as a null-terminated string.  The string is
1.1  mrg    freed when the reader is destroyed.  Useful for diagnostics.  */
1.1  mrg unsigned char *
1.1  mrg cpp_token_as_text (cpp_reader *pfile, const cpp_token *token)
1.1  mrg {
1.1  mrg   unsigned int len = cpp_token_len (token) + 1;
1.1  mrg   unsigned char *start = _cpp_unaligned_alloc (pfile, len), *end;
1.1  mrg
1.1  mrg   end = cpp_spell_token (pfile, token, start, false);
1.1  mrg   end[0] = '\0';
1.1  mrg
1.1  mrg   return start;
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns a pointer to a string which spells the token defined by
1.1  mrg    TYPE and FLAGS.  Used by C front ends, which really should move to
1.1  mrg    using cpp_token_as_text.  */
1.1  mrg const char *
1.1  mrg cpp_type2name (enum cpp_ttype type, unsigned char flags)
1.1  mrg {
1.1  mrg   if (flags & DIGRAPH)
1.1  mrg     return (const char *) cpp_digraph2name (type);
1.1  mrg   else if (flags & NAMED_OP)
1.1  mrg     return cpp_named_operator2name (type);
1.1  mrg
1.1  mrg   return (const char *) token_spellings[type].name;
1.1  mrg }
1.1  mrg
1.1  mrg /* Writes the spelling of token to FP, without any preceding space.
1.1  mrg    Separated from cpp_spell_token for efficiency - to avoid stdio
1.1  mrg    double-buffering.  */
1.1  mrg void
1.1  mrg cpp_output_token (const cpp_token *token, FILE *fp)
1.1  mrg {
1.1  mrg   switch (TOKEN_SPELL (token))
1.1  mrg     {
1.1  mrg     case SPELL_OPERATOR:
1.1  mrg       {
1.1  mrg 	const unsigned char *spelling;
1.1  mrg 	int c;
1.1  mrg
1.1  mrg 	if (token->flags & DIGRAPH)
1.1  mrg 	  spelling = cpp_digraph2name (token->type);
1.1  mrg 	else if (token->flags & NAMED_OP)
1.1  mrg 	  goto spell_ident;
1.1  mrg 	else
1.1  mrg 	  spelling = TOKEN_NAME (token);
1.1  mrg
1.1  mrg 	c = *spelling;
1.1  mrg 	do
1.1  mrg 	  putc (c, fp);
1.1  mrg 	while ((c = *++spelling) != '\0');
1.1  mrg       }
1.1  mrg       break;
1.1  mrg
1.1  mrg     spell_ident:
1.1  mrg     case SPELL_IDENT:
1.1  mrg       {
1.1  mrg 	size_t i;
1.1  mrg 	const unsigned char * name = NODE_NAME (token->val.node.node);
1.1  mrg
1.1  mrg 	for (i = 0; i < NODE_LEN (token->val.node.node); i++)
1.1  mrg 	  if (name[i] & ~0x7F)
1.1  mrg 	    {
1.1  mrg 	      unsigned char buffer[10];
1.1  mrg 	      i += utf8_to_ucn (buffer, name + i) - 1;
1.1  mrg 	      fwrite (buffer, 1, 10, fp);
1.1  mrg 	    }
1.1  mrg 	  else
1.1  mrg 	    fputc (NODE_NAME (token->val.node.node)[i], fp);
1.1  mrg       }
1.1  mrg       break;
1.1  mrg
1.1  mrg     case SPELL_LITERAL:
1.1  mrg       if (token->type == CPP_HEADER_NAME)
1.1  mrg 	fputc ('"', fp);
1.1  mrg       fwrite (token->val.str.text, 1, token->val.str.len, fp);
1.1  mrg       if (token->type == CPP_HEADER_NAME)
1.1  mrg 	fputc ('"', fp);
1.1  mrg       break;
1.1  mrg
1.1  mrg     case SPELL_NONE:
1.1  mrg       /* An error, most probably.  */
1.1  mrg       break;
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Compare two tokens.  */
1.1  mrg int
1.1  mrg _cpp_equiv_tokens (const cpp_token *a, const cpp_token *b)
1.1  mrg {
1.1  mrg   if (a->type == b->type && a->flags == b->flags)
1.1  mrg     switch (TOKEN_SPELL (a))
1.1  mrg       {
1.1  mrg       default:			/* Keep compiler happy.  */
1.1  mrg       case SPELL_OPERATOR:
1.1  mrg 	/* token_no is used to track where multiple consecutive ##
1.1  mrg 	   tokens were originally located.  */
1.1  mrg 	return (a->type != CPP_PASTE || a->val.token_no == b->val.token_no);
1.1  mrg       case SPELL_NONE:
1.1  mrg 	return (a->type != CPP_MACRO_ARG
1.1  mrg 		|| (a->val.macro_arg.arg_no == b->val.macro_arg.arg_no
1.1  mrg 		    && a->val.macro_arg.spelling == b->val.macro_arg.spelling));
1.1  mrg       case SPELL_IDENT:
1.1  mrg 	return (a->val.node.node == b->val.node.node
1.1  mrg 		&& a->val.node.spelling == b->val.node.spelling);
1.1  mrg       case SPELL_LITERAL:
1.1  mrg 	return (a->val.str.len == b->val.str.len
1.1  mrg 		&& !memcmp (a->val.str.text, b->val.str.text,
1.1  mrg 			    a->val.str.len));
1.1  mrg       }
1.1  mrg
1.1  mrg   return 0;
1.1  mrg }
1.1  mrg
1.1  mrg /* Returns nonzero if a space should be inserted to avoid an
1.1  mrg    accidental token paste for output.  For simplicity, it is
1.1  mrg    conservative, and occasionally advises a space where one is not
1.1  mrg    needed, e.g. "." and ".2".  */
1.1  mrg int
1.1  mrg cpp_avoid_paste (cpp_reader *pfile, const cpp_token *token1,
1.1  mrg 		 const cpp_token *token2)
1.1  mrg {
1.1  mrg   enum cpp_ttype a = token1->type, b = token2->type;
1.1  mrg   cppchar_t c;
1.1  mrg
1.1  mrg   if (token1->flags & NAMED_OP)
1.1  mrg     a = CPP_NAME;
1.1  mrg   if (token2->flags & NAMED_OP)
1.1  mrg     b = CPP_NAME;
1.1  mrg
1.1  mrg   c = EOF;
1.1  mrg   if (token2->flags & DIGRAPH)
1.1  mrg     c = digraph_spellings[(int) b - (int) CPP_FIRST_DIGRAPH][0];
1.1  mrg   else if (token_spellings[b].category == SPELL_OPERATOR)
1.1  mrg     c = token_spellings[b].name[0];
1.1  mrg
1.1  mrg   /* Quickly get everything that can paste with an '='.  */
1.1  mrg   if ((int) a <= (int) CPP_LAST_EQ && c == '=')
1.1  mrg     return 1;
1.1  mrg
1.1  mrg   switch (a)
1.1  mrg     {
1.1  mrg     case CPP_GREATER:	return c == '>';
1.1  mrg     case CPP_LESS:	return c == '<' || c == '%' || c == ':';
1.1  mrg     case CPP_PLUS:	return c == '+';
1.1  mrg     case CPP_MINUS:	return c == '-' || c == '>';
1.1  mrg     case CPP_DIV:	return c == '/' || c == '*'; /* Comments.  */
1.1  mrg     case CPP_MOD:	return c == ':' || c == '>';
1.1  mrg     case CPP_AND:	return c == '&';
1.1  mrg     case CPP_OR:	return c == '|';
1.1  mrg     case CPP_COLON:	return c == ':' || c == '>';
1.1  mrg     case CPP_DEREF:	return c == '*';
1.1  mrg     case CPP_DOT:	return c == '.' || c == '%' || b == CPP_NUMBER;
1.1  mrg     case CPP_HASH:	return c == '#' || c == '%'; /* Digraph form.  */
1.1  mrg     case CPP_PRAGMA:
1.1  mrg     case CPP_NAME:	return ((b == CPP_NUMBER
1.1  mrg 				 && name_p (pfile, &token2->val.str))
1.1  mrg 				|| b == CPP_NAME
1.1  mrg 				|| b == CPP_CHAR || b == CPP_STRING); /* L */
1.1  mrg     case CPP_NUMBER:	return (b == CPP_NUMBER || b == CPP_NAME
1.1  mrg 				|| b == CPP_CHAR
1.1  mrg 				|| c == '.' || c == '+' || c == '-');
1.1  mrg 				      /* UCNs */
1.1  mrg     case CPP_OTHER:	return ((token1->val.str.text[0] == '\\'
1.1  mrg 				 && b == CPP_NAME)
1.1  mrg 				|| (CPP_OPTION (pfile, objc)
1.1  mrg 				    && token1->val.str.text[0] == '@'
1.1  mrg 				    && (b == CPP_NAME || b == CPP_STRING)));
1.1  mrg     case CPP_LESS_EQ:	return c == '>';
1.1  mrg     case CPP_STRING:
1.1  mrg     case CPP_WSTRING:
1.1  mrg     case CPP_UTF8STRING:
1.1  mrg     case CPP_STRING16:
1.1  mrg     case CPP_STRING32:	return (CPP_OPTION (pfile, user_literals)
1.1  mrg 				&& (b == CPP_NAME
1.1  mrg 				    || (TOKEN_SPELL (token2) == SPELL_LITERAL
1.1  mrg 					&& ISIDST (token2->val.str.text[0]))));
1.1  mrg
1.1  mrg     default:		break;
1.1  mrg     }
1.1  mrg
1.1  mrg   return 0;
1.1  mrg }
1.1  mrg
1.1  mrg /* Output all the remaining tokens on the current line, and a newline
1.1  mrg    character, to FP.  Leading whitespace is removed.  If there are
1.1  mrg    macros, special token padding is not performed.  */
1.1  mrg void
1.1  mrg cpp_output_line (cpp_reader *pfile, FILE *fp)
1.1  mrg {
1.1  mrg   const cpp_token *token;
1.1  mrg
1.1  mrg   token = cpp_get_token (pfile);
1.1  mrg   while (token->type != CPP_EOF)
1.1  mrg     {
1.1  mrg       cpp_output_token (token, fp);
1.1  mrg       token = cpp_get_token (pfile);
1.1  mrg       if (token->flags & PREV_WHITE)
1.1  mrg 	putc (' ', fp);
1.1  mrg     }
1.1  mrg
1.1  mrg   putc ('\n', fp);
1.1  mrg }
1.1  mrg
1.1  mrg /* Return a string representation of all the remaining tokens on the
1.1  mrg    current line.  The result is allocated using xmalloc and must be
1.1  mrg    freed by the caller.  */
1.1  mrg unsigned char *
1.1  mrg cpp_output_line_to_string (cpp_reader *pfile, const unsigned char *dir_name)
1.1  mrg {
1.1  mrg   const cpp_token *token;
1.1  mrg   unsigned int out = dir_name ? ustrlen (dir_name) : 0;
1.1  mrg   unsigned int alloced = 120 + out;
1.1  mrg   unsigned char *result = (unsigned char *) xmalloc (alloced);
1.1  mrg
1.1  mrg   /* If DIR_NAME is empty, there are no initial contents.  */
1.1  mrg   if (dir_name)
1.1  mrg     {
1.1  mrg       sprintf ((char *) result, "#%s ", dir_name);
1.1  mrg       out += 2;
1.1  mrg     }
1.1  mrg
1.1  mrg   token = cpp_get_token (pfile);
1.1  mrg   while (token->type != CPP_EOF)
1.1  mrg     {
1.1  mrg       unsigned char *last;
1.1  mrg       /* Include room for a possible space and the terminating nul.  */
1.1  mrg       unsigned int len = cpp_token_len (token) + 2;
1.1  mrg
1.1  mrg       if (out + len > alloced)
1.1  mrg 	{
1.1  mrg 	  alloced *= 2;
1.1  mrg 	  if (out + len > alloced)
1.1  mrg 	    alloced = out + len;
1.1  mrg 	  result = (unsigned char *) xrealloc (result, alloced);
1.1  mrg 	}
1.1  mrg
1.1  mrg       last = cpp_spell_token (pfile, token, &result[out], 0);
1.1  mrg       out = last - result;
1.1  mrg
1.1  mrg       token = cpp_get_token (pfile);
1.1  mrg       if (token->flags & PREV_WHITE)
1.1  mrg 	result[out++] = ' ';
1.1  mrg     }
1.1  mrg
1.1  mrg   result[out] = '\0';
1.1  mrg   return result;
1.1  mrg }
1.1  mrg
1.1  mrg /* Memory buffers.  Changing these three constants can have a dramatic
1.1  mrg    effect on performance.  The values here are reasonable defaults,
1.1  mrg    but might be tuned.  If you adjust them, be sure to test across a
1.1  mrg    range of uses of cpplib, including heavy nested function-like macro
1.1  mrg    expansion.  Also check the change in peak memory usage (NJAMD is a
1.1  mrg    good tool for this).  */
1.1  mrg #define MIN_BUFF_SIZE 8000
1.1  mrg #define BUFF_SIZE_UPPER_BOUND(MIN_SIZE) (MIN_BUFF_SIZE + (MIN_SIZE) * 3 / 2)
1.1  mrg #define EXTENDED_BUFF_SIZE(BUFF, MIN_EXTRA) \
1.1  mrg 	(MIN_EXTRA + ((BUFF)->limit - (BUFF)->cur) * 2)
1.1  mrg
1.1  mrg #if MIN_BUFF_SIZE > BUFF_SIZE_UPPER_BOUND (0)
1.1  mrg   #error BUFF_SIZE_UPPER_BOUND must be at least as large as MIN_BUFF_SIZE!
1.1  mrg #endif
1.1  mrg
1.1  mrg /* Create a new allocation buffer.  Place the control block at the end
1.1  mrg    of the buffer, so that buffer overflows will cause immediate chaos.  */
1.1  mrg static _cpp_buff *
1.1  mrg new_buff (size_t len)
1.1  mrg {
1.1  mrg   _cpp_buff *result;
1.1  mrg   unsigned char *base;
1.1  mrg
1.1  mrg   if (len < MIN_BUFF_SIZE)
1.1  mrg     len = MIN_BUFF_SIZE;
1.1  mrg   len = CPP_ALIGN (len);
1.1  mrg
1.1  mrg #ifdef ENABLE_VALGRIND_ANNOTATIONS
1.1  mrg   /* Valgrind warns about uses of interior pointers, so put _cpp_buff
1.1  mrg      struct first.  */
1.1  mrg   size_t slen = CPP_ALIGN2 (sizeof (_cpp_buff), 2 * DEFAULT_ALIGNMENT);
1.1  mrg   base = XNEWVEC (unsigned char, len + slen);
1.1  mrg   result = (_cpp_buff *) base;
1.1  mrg   base += slen;
1.1  mrg #else
1.1  mrg   base = XNEWVEC (unsigned char, len + sizeof (_cpp_buff));
1.1  mrg   result = (_cpp_buff *) (base + len);
1.1  mrg #endif
1.1  mrg   result->base = base;
1.1  mrg   result->cur = base;
1.1  mrg   result->limit = base + len;
1.1  mrg   result->next = NULL;
1.1  mrg   return result;
1.1  mrg }
1.1  mrg
1.1  mrg /* Place a chain of unwanted allocation buffers on the free list.  */
1.1  mrg void
1.1  mrg _cpp_release_buff (cpp_reader *pfile, _cpp_buff *buff)
1.1  mrg {
1.1  mrg   _cpp_buff *end = buff;
1.1  mrg
1.1  mrg   while (end->next)
1.1  mrg     end = end->next;
1.1  mrg   end->next = pfile->free_buffs;
1.1  mrg   pfile->free_buffs = buff;
1.1  mrg }
1.1  mrg
1.1  mrg /* Return a free buffer of size at least MIN_SIZE.  */
1.1  mrg _cpp_buff *
1.1  mrg _cpp_get_buff (cpp_reader *pfile, size_t min_size)
1.1  mrg {
1.1  mrg   _cpp_buff *result, **p;
1.1  mrg
1.1  mrg   for (p = &pfile->free_buffs;; p = &(*p)->next)
1.1  mrg     {
1.1  mrg       size_t size;
1.1  mrg
1.1  mrg       if (*p == NULL)
1.1  mrg 	return new_buff (min_size);
1.1  mrg       result = *p;
1.1  mrg       size = result->limit - result->base;
1.1  mrg       /* Return a buffer that's big enough, but don't waste one that's
1.1  mrg          way too big.  */
1.1  mrg       if (size >= min_size && size <= BUFF_SIZE_UPPER_BOUND (min_size))
1.1  mrg 	break;
1.1  mrg     }
1.1  mrg
1.1  mrg   *p = result->next;
1.1  mrg   result->next = NULL;
1.1  mrg   result->cur = result->base;
1.1  mrg   return result;
1.1  mrg }
1.1  mrg
1.1  mrg /* Creates a new buffer with enough space to hold the uncommitted
1.1  mrg    remaining bytes of BUFF, and at least MIN_EXTRA more bytes.  Copies
1.1  mrg    the excess bytes to the new buffer.  Chains the new buffer after
1.1  mrg    BUFF, and returns the new buffer.  */
1.1  mrg _cpp_buff *
1.1  mrg _cpp_append_extend_buff (cpp_reader *pfile, _cpp_buff *buff, size_t min_extra)
1.1  mrg {
1.1  mrg   size_t size = EXTENDED_BUFF_SIZE (buff, min_extra);
1.1  mrg   _cpp_buff *new_buff = _cpp_get_buff (pfile, size);
1.1  mrg
1.1  mrg   buff->next = new_buff;
1.1  mrg   memcpy (new_buff->base, buff->cur, BUFF_ROOM (buff));
1.1  mrg   return new_buff;
1.1  mrg }
1.1  mrg
1.1  mrg /* Creates a new buffer with enough space to hold the uncommitted
1.1  mrg    remaining bytes of the buffer pointed to by BUFF, and at least
1.1  mrg    MIN_EXTRA more bytes.  Copies the excess bytes to the new buffer.
1.1  mrg    Chains the new buffer before the buffer pointed to by BUFF, and
1.1  mrg    updates the pointer to point to the new buffer.  */
1.1  mrg void
1.1  mrg _cpp_extend_buff (cpp_reader *pfile, _cpp_buff **pbuff, size_t min_extra)
1.1  mrg {
1.1  mrg   _cpp_buff *new_buff, *old_buff = *pbuff;
1.1  mrg   size_t size = EXTENDED_BUFF_SIZE (old_buff, min_extra);
1.1  mrg
1.1  mrg   new_buff = _cpp_get_buff (pfile, size);
1.1  mrg   memcpy (new_buff->base, old_buff->cur, BUFF_ROOM (old_buff));
1.1  mrg   new_buff->next = old_buff;
1.1  mrg   *pbuff = new_buff;
1.1  mrg }
1.1  mrg
1.1  mrg /* Free a chain of buffers starting at BUFF.  */
1.1  mrg void
1.1  mrg _cpp_free_buff (_cpp_buff *buff)
1.1  mrg {
1.1  mrg   _cpp_buff *next;
1.1  mrg
1.1  mrg   for (; buff; buff = next)
1.1  mrg     {
1.1  mrg       next = buff->next;
1.1  mrg #ifdef ENABLE_VALGRIND_ANNOTATIONS
1.1  mrg       free (buff);
1.1  mrg #else
1.1  mrg       free (buff->base);
1.1  mrg #endif
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* Allocate permanent, unaligned storage of length LEN.  */
1.1  mrg unsigned char *
1.1  mrg _cpp_unaligned_alloc (cpp_reader *pfile, size_t len)
1.1  mrg {
1.1  mrg   _cpp_buff *buff = pfile->u_buff;
1.1  mrg   unsigned char *result = buff->cur;
1.1  mrg
1.1  mrg   if (len > (size_t) (buff->limit - result))
1.1  mrg     {
1.1  mrg       buff = _cpp_get_buff (pfile, len);
1.1  mrg       buff->next = pfile->u_buff;
1.1  mrg       pfile->u_buff = buff;
1.1  mrg       result = buff->cur;
1.1  mrg     }
1.1  mrg
1.1  mrg   buff->cur = result + len;
1.1  mrg   return result;
1.1  mrg }
1.1  mrg
1.1  mrg /* Allocate permanent, unaligned storage of length LEN from a_buff.
1.1  mrg    That buffer is used for growing allocations when saving macro
1.1  mrg    replacement lists in a #define, and when parsing an answer to an
1.1  mrg    assertion in #assert, #unassert or #if (and therefore possibly
1.1  mrg    whilst expanding macros).  It therefore must not be used by any
1.1  mrg    code that they might call: specifically the lexer and the guts of
1.1  mrg    the macro expander.
1.1  mrg
1.1  mrg    All existing other uses clearly fit this restriction: storing
1.1  mrg    registered pragmas during initialization.  */
1.1  mrg unsigned char *
1.1  mrg _cpp_aligned_alloc (cpp_reader *pfile, size_t len)
1.1  mrg {
1.1  mrg   _cpp_buff *buff = pfile->a_buff;
1.1  mrg   unsigned char *result = buff->cur;
1.1  mrg
1.1  mrg   if (len > (size_t) (buff->limit - result))
1.1  mrg     {
1.1  mrg       buff = _cpp_get_buff (pfile, len);
1.1  mrg       buff->next = pfile->a_buff;
1.1  mrg       pfile->a_buff = buff;
1.1  mrg       result = buff->cur;
1.1  mrg     }
1.1  mrg
1.1  mrg   buff->cur = result + len;
1.1  mrg   return result;
1.1  mrg }
1.1  mrg
1.1  mrg /* Commit or allocate storage from a buffer.  */
1.1  mrg
1.1  mrg void *
1.1  mrg _cpp_commit_buff (cpp_reader *pfile, size_t size)
1.1  mrg {
1.1  mrg   void *ptr = BUFF_FRONT (pfile->a_buff);
1.1  mrg
1.1  mrg   if (pfile->hash_table->alloc_subobject)
1.1  mrg     {
1.1  mrg       void *copy = pfile->hash_table->alloc_subobject (size);
1.1  mrg       memcpy (copy, ptr, size);
1.1  mrg       ptr = copy;
1.1  mrg     }
1.1  mrg   else
1.1  mrg     BUFF_FRONT (pfile->a_buff) += size;
1.1  mrg
1.1  mrg   return ptr;
1.1  mrg }
1.1  mrg
1.1  mrg /* Say which field of TOK is in use.  */
1.1  mrg
1.1  mrg enum cpp_token_fld_kind
1.1  mrg cpp_token_val_index (const cpp_token *tok)
1.1  mrg {
1.1  mrg   switch (TOKEN_SPELL (tok))
1.1  mrg     {
1.1  mrg     case SPELL_IDENT:
1.1  mrg       return CPP_TOKEN_FLD_NODE;
1.1  mrg     case SPELL_LITERAL:
1.1  mrg       return CPP_TOKEN_FLD_STR;
1.1  mrg     case SPELL_OPERATOR:
1.1  mrg       /* Operands which were originally spelled as ident keep around
1.1  mrg          the node for the exact spelling.  */
1.1  mrg       if (tok->flags & NAMED_OP)
1.1  mrg 	return CPP_TOKEN_FLD_NODE;
1.1  mrg       else if (tok->type == CPP_PASTE)
1.1  mrg 	return CPP_TOKEN_FLD_TOKEN_NO;
1.1  mrg       else
1.1  mrg 	return CPP_TOKEN_FLD_NONE;
1.1  mrg     case SPELL_NONE:
1.1  mrg       if (tok->type == CPP_MACRO_ARG)
1.1  mrg 	return CPP_TOKEN_FLD_ARG_NO;
1.1  mrg       else if (tok->type == CPP_PADDING)
1.1  mrg 	return CPP_TOKEN_FLD_SOURCE;
1.1  mrg       else if (tok->type == CPP_PRAGMA)
1.1  mrg 	return CPP_TOKEN_FLD_PRAGMA;
1.1  mrg       /* fall through */
1.1  mrg     default:
1.1  mrg       return CPP_TOKEN_FLD_NONE;
1.1  mrg     }
1.1  mrg }
1.1  mrg
1.1  mrg /* All tokens lexed in R after calling this function will be forced to
1.1  mrg    have their location_t to be P, until
1.1  mrg    cpp_stop_forcing_token_locations is called for R.  */
1.1  mrg
1.1  mrg void
1.1  mrg cpp_force_token_locations (cpp_reader *r, location_t loc)
1.1  mrg {
1.1  mrg   r->forced_token_location = loc;
1.1  mrg }
1.1  mrg
1.1  mrg /* Go back to assigning locations naturally for lexed tokens.  */
1.1  mrg
1.1  mrg void
1.1  mrg cpp_stop_forcing_token_locations (cpp_reader *r)
1.1  mrg {
1.1  mrg   r->forced_token_location = 0;
1.1  mrg }
1.1  mrg
1.1  mrg /* We're looking at \, if it's escaping EOL, look past it.  If at
1.1  mrg    LIMIT, don't advance.  */
1.1  mrg
1.1  mrg static const unsigned char *
1.1  mrg do_peek_backslash (const unsigned char *peek, const unsigned char *limit)
1.1  mrg {
1.1  mrg   const unsigned char *probe = peek;
1.1  mrg
1.1  mrg   if (__builtin_expect (peek[1] == '\n', true))
1.1  mrg     {
1.1  mrg     eol:
1.1  mrg       probe += 2;
1.1  mrg       if (__builtin_expect (probe < limit, true))
1.1  mrg 	{
1.1  mrg 	  peek = probe;
1.1  mrg 	  if (*peek == '\\')
1.1  mrg 	    /* The user might be perverse.  */
1.1  mrg 	    return do_peek_backslash (peek, limit);
1.1  mrg 	}
1.1  mrg     }
1.1  mrg   else if (__builtin_expect (peek[1] == '\r', false))
1.1  mrg     {
1.1  mrg       if (probe[2] == '\n')
1.1  mrg 	probe++;
1.1  mrg       goto eol;
1.1  mrg     }
1.1  mrg
1.1  mrg   return peek;
1.1  mrg }
1.1  mrg
1.1  mrg static const unsigned char *
1.1  mrg do_peek_next (const unsigned char *peek, const unsigned char *limit)
1.1  mrg {
1.1  mrg   if (__builtin_expect (*peek == '\\', false))
1.1  mrg     peek = do_peek_backslash (peek, limit);
1.1  mrg   return peek;
1.1  mrg }
1.1  mrg
1.1  mrg static const unsigned char *
1.1  mrg do_peek_prev (const unsigned char *peek, const unsigned char *bound)
1.1  mrg {
1.1  mrg   if (peek == bound)
1.1  mrg     return NULL;
1.1  mrg
1.1  mrg   unsigned char c = *--peek;
1.1  mrg   if (__builtin_expect (c == '\n', false)
1.1  mrg       || __builtin_expect (c == 'r', false))
1.1  mrg     {
1.1  mrg       if (peek == bound)
1.1  mrg 	return peek;
1.1  mrg       int ix = -1;
1.1  mrg       if (c == '\n' && peek[ix] == '\r')
1.1  mrg 	{
1.1  mrg 	  if (peek + ix == bound)
1.1  mrg 	    return peek;
1.1  mrg 	  ix--;
1.1  mrg 	}
1.1  mrg
1.1  mrg       if (peek[ix] == '\\')
1.1  mrg 	return do_peek_prev (peek + ix, bound);
1.1  mrg
1.1  mrg       return peek;
1.1  mrg     }
1.1  mrg   else
1.1  mrg     return peek;
1.1  mrg }
1.1  mrg
1.1  mrg /* If PEEK[-1] is identifier MATCH, scan past it and trailing white
1.1  mrg    space.  Otherwise return NULL.  */
1.1  mrg
1.1  mrg static const unsigned char *
1.1  mrg do_peek_ident (const char *match, const unsigned char *peek,
1.1  mrg 	       const unsigned char *limit)
1.1  mrg {
1.1  mrg   for (; *++match; peek++)
1.1  mrg     if (*peek != *match)
1.1  mrg       {
1.1  mrg 	peek = do_peek_next (peek, limit);
1.1  mrg 	if (*peek != *match)
1.1  mrg 	  return NULL;
1.1  mrg       }
1.1  mrg
1.1  mrg   /* Must now not be looking at an identifier char.  */
1.1  mrg   peek = do_peek_next (peek, limit);
1.1  mrg   if (ISIDNUM (*peek))
1.1  mrg     return NULL;
1.1  mrg
1.1  mrg   /* Skip control-line whitespace.  */
1.1  mrg  ws:
1.1  mrg   while (*peek == ' ' || *peek == '\t')
1.1  mrg     peek++;
1.1  mrg   if (__builtin_expect (*peek == '\\', false))
1.1  mrg     {
1.1  mrg       peek = do_peek_backslash (peek, limit);
1.1  mrg       if (*peek != '\\')
1.1  mrg 	goto ws;
1.1  mrg     }
1.1  mrg
1.1  mrg   return peek;
1.1  mrg }
1.1  mrg
1.1  mrg /* Are we looking at a module control line starting as PEEK - 1?  */
1.1  mrg
1.1  mrg static bool
1.1  mrg do_peek_module (cpp_reader *pfile, unsigned char c,
1.1  mrg 		const unsigned char *peek, const unsigned char *limit)
1.1  mrg {
1.1  mrg   bool import = false;
1.1  mrg
1.1  mrg   if (__builtin_expect (c == 'e', false))
1.1  mrg     {
1.1  mrg       if (!((peek[0] == 'x' || peek[0] == '\\')
1.1  mrg 	    && (peek = do_peek_ident ("export", peek, limit))))
1.1  mrg 	return false;
1.1  mrg
1.1  mrg       /* export, peek for import or module.  No need to peek __import
1.1  mrg 	 here.  */
1.1  mrg       if (peek[0] == 'i')
1.1  mrg 	{
1.1  mrg 	  if (!((peek[1] == 'm' || peek[1] == '\\')
1.1  mrg 		&& (peek = do_peek_ident ("import", peek + 1, limit))))
1.1  mrg 	    return false;
1.1  mrg 	  import = true;
1.1  mrg 	}
1.1  mrg       else if (peek[0] == 'm')
1.1  mrg 	{
1.1  mrg 	  if (!((peek[1] == 'o' || peek[1] == '\\')
1.1  mrg 		&& (peek = do_peek_ident ("module", peek + 1, limit))))
1.1  mrg 	    return false;
1.1  mrg 	}
1.1  mrg       else
1.1  mrg 	return false;
1.1  mrg     }
1.1  mrg   else if (__builtin_expect (c == 'i', false))
1.1  mrg     {
1.1  mrg       if (!((peek[0] == 'm' || peek[0] == '\\')
1.1  mrg 	    && (peek = do_peek_ident ("import", peek, limit))))
1.1  mrg 	return false;
1.1  mrg       import = true;
1.1  mrg     }
1.1  mrg   else if (__builtin_expect (c == '_', false))
1.1  mrg     {
1.1  mrg       /* Needed for translated includes.   */
1.1  mrg       if (!((peek[0] == '_' || peek[0] == '\\')
1.1  mrg 	    && (peek = do_peek_ident ("__import", peek, limit))))
1.1  mrg 	return false;
1.1  mrg       import = true;
1.1  mrg     }
1.1  mrg   else if (__builtin_expect (c == 'm', false))
1.1  mrg     {
1.1  mrg       if (!((peek[0] == 'o' || peek[0] == '\\')
1.1  mrg 	    && (peek = do_peek_ident ("module", peek, limit))))
1.1  mrg 	return false;
1.1  mrg     }
1.1  mrg   else
1.1  mrg     return false;
1.1  mrg
1.1  mrg   /* Peek the next character to see if it's good enough.  We'll be at
1.1  mrg      the first non-whitespace char, including skipping an escaped
1.1  mrg      newline.  */
1.1  mrg   /* ... import followed by identifier, ':', '<' or header-name
1.1  mrg      preprocessing tokens, or module followed by identifier, ':' or
1.1  mrg      ';' preprocessing tokens.  */
1.1  mrg   unsigned char p = *peek++;
1.1  mrg
1.1  mrg   /* A character literal is ... single quotes, ... optionally preceded
1.1  mrg      by u8, u, U, or L */
1.1  mrg   /* A string-literal is a ... double quotes, optionally prefixed by
1.1  mrg      R, u8, u8R, u, uR, U, UR, L, or LR */
1.1  mrg   if (p == 'u')
1.1  mrg     {
1.1  mrg       peek = do_peek_next (peek, limit);
1.1  mrg       if (*peek == '8')
1.1  mrg 	{
1.1  mrg 	  peek++;
1.1  mrg 	  goto peek_u8;
1.1  mrg 	}
1.1  mrg       goto peek_u;
1.1  mrg     }
1.1  mrg   else if (p == 'U' || p == 'L')
1.1  mrg     {
1.1  mrg     peek_u8:
1.1  mrg       peek = do_peek_next (peek, limit);
1.1  mrg     peek_u:
1.1  mrg       if (*peek == '\"' || *peek == '\'')
1.1  mrg 	return false;
1.1  mrg
1.1  mrg       if (*peek == 'R')
1.1  mrg 	goto peek_R;
1.1  mrg       /* Identifier. Ok.  */
1.1  mrg     }
1.1  mrg   else if (p == 'R')
1.1  mrg     {
1.1  mrg     peek_R:
1.1  mrg       if (CPP_OPTION (pfile, rliterals))
1.1  mrg 	{
1.1  mrg 	  peek = do_peek_next (peek, limit);
1.1  mrg 	  if (*peek == '\"')
1.1  mrg 	    return false;
1.1  mrg 	}
1.1  mrg       /* Identifier. Ok.  */
1.1  mrg     }
1.1  mrg   else if ('Z' - 'A' == 25
1.1  mrg 	   ? ((p >= 'A' && p <= 'Z') || (p >= 'a' && p <= 'z') || p == '_')
1.1  mrg 	   : ISIDST (p))
1.1  mrg     {
1.1  mrg       /* Identifier.  Ok. */
1.1  mrg     }
1.1  mrg   else if (p == '<')
1.1  mrg     {
1.1  mrg       /* Maybe angle header, ok for import.  Reject
1.1  mrg 	 '<=', '<<' digraph:'<:'.  */
1.1  mrg       if (!import)
1.1  mrg 	return false;
1.1  mrg       peek = do_peek_next (peek, limit);
1.1  mrg       if (*peek == '=' || *peek == '<'
1.1  mrg 	  || (*peek == ':' && CPP_OPTION (pfile, digraphs)))
1.1  mrg 	return false;
1.1  mrg     }
1.1  mrg   else if (p == ';')
1.1  mrg     {
1.1  mrg       /* SEMICOLON, ok for module.  */
1.1  mrg       if (import)
1.1  mrg 	return false;
1.1  mrg     }
1.1  mrg   else if (p == '"')
1.1  mrg     {
1.1  mrg       /* STRING, ok for import.  */
1.1  mrg       if (!import)
1.1  mrg 	return false;
1.1  mrg     }
1.1  mrg   else if (p == ':')
1.1  mrg     {
1.1  mrg       /* Maybe COLON, ok.  Reject '::', digraph:':>'.  */
1.1  mrg       peek = do_peek_next (peek, limit);
1.1  mrg       if (*peek == ':' || (*peek == '>' && CPP_OPTION (pfile, digraphs)))
1.1  mrg 	return false;
1.1  mrg     }
1.1  mrg   else
1.1  mrg     /* FIXME: Detect a unicode character, excluding those not
1.1  mrg        permitted as the initial character. [lex.name]/1.  I presume
1.1  mrg        we need to check the \[uU] spellings, and directly using
1.1  mrg        Unicode in say UTF8 form?  Or perhaps we do the phase-1
1.1  mrg        conversion of UTF8 to universal-character-names?  */
1.1  mrg     return false;
1.1  mrg
1.1  mrg   return true;
1.1  mrg }
1.1  mrg
1.1  mrg /* Directives-only scanning.  Somewhat more relaxed than correct
1.1  mrg    parsing -- some ill-formed programs will not be rejected.  */
1.1  mrg
1.1  mrg void
1.1  mrg cpp_directive_only_process (cpp_reader *pfile,
1.1  mrg 			    void *data,
1.1  mrg 			    void (*cb) (cpp_reader *, CPP_DO_task, void *, ...))
1.1  mrg {
1.1  mrg   bool module_p = CPP_OPTION (pfile, module_directives);
1.1  mrg
1.1  mrg   do
1.1  mrg     {
1.1  mrg     restart:
1.1  mrg       /* Buffer initialization, but no line cleaning. */
1.1  mrg       cpp_buffer *buffer = pfile->buffer;
1.1  mrg       buffer->cur_note = buffer->notes_used = 0;
1.1  mrg       buffer->cur = buffer->line_base = buffer->next_line;
1.1  mrg       buffer->need_line = false;
1.1  mrg       /* Files always end in a newline or carriage return.  We rely on this for
1.1  mrg 	 character peeking safety.  */
1.1  mrg       gcc_assert (buffer->rlimit[0] == '\n' || buffer->rlimit[0] == '\r');
1.1  mrg
1.1  mrg       const unsigned char *base = buffer->cur;
1.1  mrg       unsigned line_count = 0;
1.1  mrg       const unsigned char *line_start = base;
1.1  mrg
1.1  mrg       bool bol = true;
1.1  mrg       bool raw = false;
1.1  mrg
1.1  mrg       const unsigned char *lwm = base;
1.1  mrg       for (const unsigned char *pos = base, *limit = buffer->rlimit;
1.1  mrg 	   pos < limit;)
1.1  mrg 	{
1.1  mrg 	  unsigned char c = *pos++;
1.1  mrg 	  /* This matches the switch in _cpp_lex_direct.  */
1.1  mrg 	  switch (c)
1.1  mrg 	    {
1.1  mrg 	    case ' ': case '\t': case '\f': case '\v':
1.1  mrg 	      /* Whitespace, do nothing.  */
1.1  mrg 	      break;
1.1  mrg
1.1  mrg 	    case '\r': /* MAC line ending, or Windows \r\n  */
1.1  mrg 	      if (*pos == '\n')
1.1  mrg 		pos++;
1.1  mrg 	      /* FALLTHROUGH */
1.1  mrg
1.1  mrg 	    case '\n':
1.1  mrg 	      bol = true;
1.1  mrg
1.1  mrg 	    next_line:
1.1  mrg 	      CPP_INCREMENT_LINE (pfile, 0);
1.1  mrg 	      line_count++;
1.1  mrg 	      line_start = pos;
1.1  mrg 	      break;
1.1  mrg
1.1  mrg 	    case '\\':
1.1  mrg 	      /* <backslash><newline> is removed, and doesn't undo any
1.1  mrg 		 preceeding escape or whatnot.  */
1.1  mrg 	      if (*pos == '\n')
1.1  mrg 		{
1.1  mrg 		  pos++;
1.1  mrg 		  goto next_line;
1.1  mrg 		}
1.1  mrg 	      else if (*pos == '\r')
1.1  mrg 		{
1.1  mrg 		  if (pos[1] == '\n')
1.1  mrg 		    pos++;
1.1  mrg 		  pos++;
1.1  mrg 		  goto next_line;
1.1  mrg 		}
1.1  mrg 	      goto dflt;
1.1  mrg
1.1  mrg 	    case '#':
1.1  mrg 	      if (bol)
1.1  mrg 		{
1.1  mrg 		  /* Line directive.  */
1.1  mrg 		  if (pos - 1 > base && !pfile->state.skipping)
1.1  mrg 		    cb (pfile, CPP_DO_print, data,
1.1  mrg 			line_count, base, pos - 1 - base);
1.1  mrg
1.1  mrg 		  /* Prep things for directive handling. */
1.1  mrg 		  buffer->next_line = pos;
1.1  mrg 		  buffer->need_line = true;
1.1  mrg 		  bool ok = _cpp_get_fresh_line (pfile);
1.1  mrg 		  gcc_checking_assert (ok);
1.1  mrg
1.1  mrg 		  /* Ensure proper column numbering for generated
1.1  mrg 		     error messages. */
1.1  mrg 		  buffer->line_base -= pos - line_start;
1.1  mrg
1.1  mrg 		  _cpp_handle_directive (pfile, line_start + 1 != pos);
1.1  mrg
1.1  mrg 		  /* Sanitize the line settings.  Duplicate #include's can
1.1  mrg 		     mess things up. */
1.1  mrg 		  // FIXME: Necessary?
1.1  mrg 		  pfile->line_table->highest_location
1.1  mrg 		    = pfile->line_table->highest_line;
1.1  mrg
1.1  mrg 		  if (!pfile->state.skipping
1.1  mrg 		      && pfile->buffer->next_line < pfile->buffer->rlimit)
1.1  mrg 		    cb (pfile, CPP_DO_location, data,
1.1  mrg 			pfile->line_table->highest_line);
1.1  mrg
1.1  mrg 		  goto restart;
1.1  mrg 		}
1.1  mrg 	      goto dflt;
1.1  mrg
1.1  mrg 	    case '/':
1.1  mrg 	      {
1.1  mrg 		const unsigned char *peek = do_peek_next (pos, limit);
1.1  mrg 		if (!(*peek == '/' || *peek == '*'))
1.1  mrg 		  goto dflt;
1.1  mrg
1.1  mrg 		/* Line or block comment  */
1.1  mrg 		bool is_block = *peek == '*';
1.1  mrg 		bool star = false;
1.1  mrg 		bool esc = false;
1.1  mrg 		location_t sloc
1.1  mrg 		  = linemap_position_for_column (pfile->line_table,
1.1  mrg 						 pos - line_start);
1.1  mrg
1.1  mrg 		while (pos < limit)
1.1  mrg 		  {
1.1  mrg 		    char c = *pos++;
1.1  mrg 		    switch (c)
1.1  mrg 		      {
1.1  mrg 		      case '\\':
1.1  mrg 			esc = true;
1.1  mrg 			break;
1.1  mrg
1.1  mrg 		      case '\r':
1.1  mrg 			if (*pos == '\n')
1.1  mrg 			  pos++;
1.1  mrg 			/* FALLTHROUGH  */
1.1  mrg
1.1  mrg 		      case '\n':
1.1  mrg 			{
1.1  mrg 			  CPP_INCREMENT_LINE (pfile, 0);
1.1  mrg 			  line_count++;
1.1  mrg 			  line_start = pos;
1.1  mrg 			  if (!esc && !is_block)
1.1  mrg 			    {
1.1  mrg 			      bol = true;
1.1  mrg 			      goto done_comment;
1.1  mrg 			    }
1.1  mrg 			}
1.1  mrg 			if (!esc)
1.1  mrg 			  star = false;
1.1  mrg 			esc = false;
1.1  mrg 			break;
1.1  mrg
1.1  mrg 		      case '*':
1.1  mrg 			if (pos > peek)
1.1  mrg 			  star = is_block;
1.1  mrg 			esc = false;
1.1  mrg 			break;
1.1  mrg
1.1  mrg 		      case '/':
1.1  mrg 			if (star)
1.1  mrg 			  goto done_comment;
1.1  mrg 			/* FALLTHROUGH  */
1.1  mrg
1.1  mrg 		      default:
1.1  mrg 			star = false;
1.1  mrg 			esc = false;
1.1  mrg 			break;
1.1  mrg 		      }
1.1  mrg 		  }
1.1  mrg 		if (pos < limit || is_block)
1.1  mrg 		  cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0,
1.1  mrg 				       "unterminated comment");
1.1  mrg 	      done_comment:
1.1  mrg 		lwm = pos;
1.1  mrg 		break;
1.1  mrg 	      }
1.1  mrg
1.1  mrg 	    case '\'':
1.1  mrg 	      if (!CPP_OPTION (pfile, digit_separators))
1.1  mrg 		goto delimited_string;
1.1  mrg
1.1  mrg 	      /* Possibly a number punctuator.  */
1.1  mrg 	      if (!ISIDNUM (*do_peek_next (pos, limit)))
1.1  mrg 		goto delimited_string;
1.1  mrg
1.1  mrg 	      goto quote_peek;
1.1  mrg
1.1  mrg 	    case '\"':
1.1  mrg 	      if (!CPP_OPTION (pfile, rliterals))
1.1  mrg 		goto delimited_string;
1.1  mrg
1.1  mrg 	    quote_peek:
1.1  mrg 	      {
1.1  mrg 		/* For ' see if it's a number punctuator
1.1  mrg 		   \.?<digit>(<digit>|<identifier-nondigit>
1.1  mrg 		   |'<digit>|'<nondigit>|[eEpP]<sign>|\.)* */
1.1  mrg 		/* For " see if it's a raw string
1.1  mrg 		   {U,L,u,u8}R.  This includes CPP_NUMBER detection,
1.1  mrg 		   because that could be 0e+R.  */
1.1  mrg 		const unsigned char *peek = pos - 1;
1.1  mrg 		bool quote_first = c == '"';
1.1  mrg 		bool quote_eight = false;
1.1  mrg 		bool maybe_number_start = false;
1.1  mrg 		bool want_number = false;
1.1  mrg
1.1  mrg 		while ((peek = do_peek_prev (peek, lwm)))
1.1  mrg 		  {
1.1  mrg 		    unsigned char p = *peek;
1.1  mrg 		    if (quote_first)
1.1  mrg 		      {
1.1  mrg 			if (!raw)
1.1  mrg 			  {
1.1  mrg 			    if (p != 'R')
1.1  mrg 			      break;
1.1  mrg 			    raw = true;
1.1  mrg 			    continue;
1.1  mrg 			  }
1.1  mrg
1.1  mrg 			quote_first = false;
1.1  mrg 			if (p == 'L' || p == 'U' || p == 'u')
1.1  mrg 			  ;
1.1  mrg 			else if (p == '8')
1.1  mrg 			  quote_eight = true;
1.1  mrg 			else
1.1  mrg 			  goto second_raw;
1.1  mrg 		      }
1.1  mrg 		    else if (quote_eight)
1.1  mrg 		      {
1.1  mrg 			if (p != 'u')
1.1  mrg 			  {
1.1  mrg 			    raw = false;
1.1  mrg 			    break;
1.1  mrg 			  }
1.1  mrg 			quote_eight = false;
1.1  mrg 		      }
1.1  mrg 		    else if (c == '"')
1.1  mrg 		      {
1.1  mrg 		      second_raw:;
1.1  mrg 			if (!want_number && ISIDNUM (p))
1.1  mrg 			  {
1.1  mrg 			    raw = false;
1.1  mrg 			    break;
1.1  mrg 			  }
1.1  mrg 		      }
1.1  mrg
1.1  mrg 		    if (ISDIGIT (p))
1.1  mrg 		      maybe_number_start = true;
1.1  mrg 		    else if (p == '.')
1.1  mrg 		      want_number = true;
1.1  mrg 		    else if (ISIDNUM (p))
1.1  mrg 		      maybe_number_start = false;
1.1  mrg 		    else if (p == '+' || p == '-')
1.1  mrg 		      {
1.1  mrg 			if (const unsigned char *peek_prev
1.1  mrg 			    = do_peek_prev (peek, lwm))
1.1  mrg 			  {
1.1  mrg 			    p = *peek_prev;
1.1  mrg 			    if (p == 'e' || p == 'E'
1.1  mrg 				|| p == 'p' || p == 'P')
1.1  mrg 			      {
1.1  mrg 				want_number = true;
1.1  mrg 				maybe_number_start = false;
1.1  mrg 			      }
1.1  mrg 			    else
1.1  mrg 			      break;
1.1  mrg 			  }
1.1  mrg 			else
1.1  mrg 			  break;
1.1  mrg 		      }
1.1  mrg 		    else if (p == '\'' || p == '\"')
1.1  mrg 		      {
1.1  mrg 			/* If this is lwm, this must be the end of a
1.1  mrg 			   previous string.  So this is a trailing
1.1  mrg 			   literal type, (a) if those are allowed,
1.1  mrg 			     and (b) maybe_start is false.  Otherwise
1.1  mrg 			     this must be a CPP_NUMBER because we've
1.1  mrg 			     met another ', and we'd have checked that
1.1  mrg 			     in its own right.  */
1.1  mrg 			if (peek == lwm && CPP_OPTION (pfile, uliterals))
1.1  mrg 			  {
1.1  mrg 			    if  (!maybe_number_start && !want_number)
1.1  mrg 			      /* Must be a literal type.  */
1.1  mrg 			      raw = false;
1.1  mrg 			  }
1.1  mrg 			else if (p == '\''
1.1  mrg 				 && CPP_OPTION (pfile, digit_separators))
1.1  mrg 			  maybe_number_start = true;
1.1  mrg 			break;
1.1  mrg 		      }
1.1  mrg 		    else if (c == '\'')
1.1  mrg 		      break;
1.1  mrg 		    else if (!quote_first && !quote_eight)
1.1  mrg 		      break;
1.1  mrg 		  }
1.1  mrg
1.1  mrg 		if (maybe_number_start)
1.1  mrg 		  {
1.1  mrg 		    if (c == '\'')
1.1  mrg 		      /* A CPP NUMBER.  */
1.1  mrg 		      goto dflt;
1.1  mrg 		    raw = false;
1.1  mrg 		  }
1.1  mrg
1.1  mrg 		goto delimited_string;
1.1  mrg 	      }
1.1  mrg
1.1  mrg 	    delimited_string:
1.1  mrg 	      {
1.1  mrg 		/* (Possibly raw) string or char literal.  */
1.1  mrg 		unsigned char end = c;
1.1  mrg 		int delim_len = -1;
1.1  mrg 		const unsigned char *delim = NULL;
1.1  mrg 		location_t sloc = linemap_position_for_column (pfile->line_table,
1.1  mrg 							       pos - line_start);
1.1  mrg 		int esc = 0;
1.1  mrg
1.1  mrg 		if (raw)
1.1  mrg 		  {
1.1  mrg 		    /* There can be no line breaks in the delimiter.  */
1.1  mrg 		    delim = pos;
1.1  mrg 		    for (delim_len = 0; (c = *pos++) != '('; delim_len++)
1.1  mrg 		      {
1.1  mrg 			if (delim_len == 16)
1.1  mrg 			  {
1.1  mrg 			    cpp_error_with_line (pfile, CPP_DL_ERROR,
1.1  mrg 						 sloc, 0,
1.1  mrg 						 "raw string delimiter"
1.1  mrg 						 " longer than %d"
1.1  mrg 						 " characters",
1.1  mrg 						 delim_len);
1.1  mrg 			    raw = false;
1.1  mrg 			    pos = delim;
1.1  mrg 			    break;
1.1  mrg 			  }
1.1  mrg 			if (strchr (") \\\t\v\f\n", c))
1.1  mrg 			  {
1.1  mrg 			    cpp_error_with_line (pfile, CPP_DL_ERROR,
1.1  mrg 						 sloc, 0,
1.1  mrg 						 "invalid character '%c'"
1.1  mrg 						 " in raw string"
1.1  mrg 						 " delimiter", c);
1.1  mrg 			    raw = false;
1.1  mrg 			    pos = delim;
1.1  mrg 			    break;
1.1  mrg 			  }
1.1  mrg 			if (pos >= limit)
1.1  mrg 			  goto bad_string;
1.1  mrg 		      }
1.1  mrg 		  }
1.1  mrg
1.1  mrg 		while (pos < limit)
1.1  mrg 		  {
1.1  mrg 		    char c = *pos++;
1.1  mrg 		    switch (c)
1.1  mrg 		      {
1.1  mrg 		      case '\\':
1.1  mrg 			if (!raw)
1.1  mrg 			  esc++;
1.1  mrg 			break;
1.1  mrg
1.1  mrg 		      case '\r':
1.1  mrg 			if (*pos == '\n')
1.1  mrg 			  pos++;
1.1  mrg 			/* FALLTHROUGH  */
1.1  mrg
1.1  mrg 		      case '\n':
1.1  mrg 			{
1.1  mrg 			  CPP_INCREMENT_LINE (pfile, 0);
1.1  mrg 			  line_count++;
1.1  mrg 			  line_start = pos;
1.1  mrg 			}
1.1  mrg 			if (esc)
1.1  mrg 			  esc--;
1.1  mrg 			break;
1.1  mrg
1.1  mrg 		      case ')':
1.1  mrg 			if (raw
1.1  mrg 			    && pos + delim_len + 1 < limit
1.1  mrg 			    && pos[delim_len] == end
1.1  mrg 			    && !memcmp (delim, pos, delim_len))
1.1  mrg 			  {
1.1  mrg 			    pos += delim_len + 1;
1.1  mrg 			    raw = false;
1.1  mrg 			    goto done_string;
1.1  mrg 			  }
1.1  mrg 			break;
1.1  mrg
1.1  mrg 		      default:
1.1  mrg 			if (!raw && !(esc & 1) && c == end)
1.1  mrg 			  goto done_string;
1.1  mrg 			esc = 0;
1.1  mrg 			break;
1.1  mrg 		      }
1.1  mrg 		  }
1.1  mrg 	      bad_string:
1.1  mrg 		cpp_error_with_line (pfile, CPP_DL_ERROR, sloc, 0,
1.1  mrg 				     "unterminated literal");
1.1  mrg
1.1  mrg 	      done_string:
1.1  mrg 		raw = false;
1.1  mrg 		lwm = pos - 1;
1.1  mrg 	      }
1.1  mrg 	      goto dflt;
1.1  mrg
1.1  mrg 	    case '_':
1.1  mrg 	    case 'e':
1.1  mrg 	    case 'i':
1.1  mrg 	    case 'm':
1.1  mrg 	      if (bol && module_p && !pfile->state.skipping
1.1  mrg 		  && do_peek_module (pfile, c, pos, limit))
1.1  mrg 		{
1.1  mrg 		  /* We've seen the start of a module control line.
1.1  mrg 		     Start up the tokenizer.  */
1.1  mrg 		  pos--; /* Backup over the first character.  */
1.1  mrg
1.1  mrg 		  /* Backup over whitespace to start of line.  */
1.1  mrg 		  while (pos > line_start
1.1  mrg 			 && (pos[-1] == ' ' || pos[-1] == '\t'))
1.1  mrg 		    pos--;
1.1  mrg
1.1  mrg 		  if (pos > base)
1.1  mrg 		    cb (pfile, CPP_DO_print, data, line_count, base, pos - base);
1.1  mrg
1.1  mrg 		  /* Prep things for directive handling. */
1.1  mrg 		  buffer->next_line = pos;
1.1  mrg 		  buffer->need_line = true;
1.1  mrg
1.1  mrg 		  /* Now get tokens until the PRAGMA_EOL.  */
1.1  mrg 		  do
1.1  mrg 		    {
1.1  mrg 		      location_t spelling;
1.1  mrg 		      const cpp_token *tok
1.1  mrg 			= cpp_get_token_with_location (pfile, &spelling);
1.1  mrg
1.1  mrg 		      gcc_assert (pfile->state.in_deferred_pragma
1.1  mrg 				  || tok->type == CPP_PRAGMA_EOL);
1.1  mrg 		      cb (pfile, CPP_DO_token, data, tok, spelling);
1.1  mrg 		    }
1.1  mrg 		  while (pfile->state.in_deferred_pragma);
1.1  mrg
1.1  mrg 		  if (pfile->buffer->next_line < pfile->buffer->rlimit)
1.1  mrg 		    cb (pfile, CPP_DO_location, data,
1.1  mrg 			pfile->line_table->highest_line);
1.1  mrg
1.1  mrg 		  pfile->mi_valid = false;
1.1  mrg 		  goto restart;
1.1  mrg 		}
1.1  mrg 	      goto dflt;
1.1  mrg
1.1  mrg 	    default:
1.1  mrg 	    dflt:
1.1  mrg 	      bol = false;
1.1  mrg 	      pfile->mi_valid = false;
1.1  mrg 	      break;
1.1  mrg 	    }
1.1  mrg 	}
1.1  mrg
1.1  mrg       if (buffer->rlimit > base && !pfile->state.skipping)
1.1  mrg 	{
1.1  mrg 	  const unsigned char *limit = buffer->rlimit;
1.1  mrg 	  /* If the file was not newline terminated, add rlimit, which is
1.1  mrg 	     guaranteed to point to a newline, to the end of our range.  */
1.1  mrg 	  if (limit[-1] != '\n')
1.1  mrg 	    {
1.1  mrg 	      limit++;
1.1  mrg 	      CPP_INCREMENT_LINE (pfile, 0);
1.1  mrg 	      line_count++;
1.1  mrg 	    }
1.1  mrg 	  cb (pfile, CPP_DO_print, data, line_count, base, limit - base);
1.1  mrg 	}
1.1  mrg
1.1  mrg       _cpp_pop_buffer (pfile);
1.1  mrg     }
1.1  mrg   while (pfile->buffer);
         }