Home | History | Annotate | Line # | Download | only in src
      1 /* xgettext Tcl backend.
      2    Copyright (C) 2002-2003, 2005-2006 Free Software Foundation, Inc.
      3 
      4    This file was written by Bruno Haible <haible (at) clisp.cons.org>, 2002.
      5 
      6    This program is free software; you can redistribute it and/or modify
      7    it under the terms of the GNU General Public License as published by
      8    the Free Software Foundation; either version 2, or (at your option)
      9    any later version.
     10 
     11    This program is distributed in the hope that it will be useful,
     12    but WITHOUT ANY WARRANTY; without even the implied warranty of
     13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14    GNU General Public License for more details.
     15 
     16    You should have received a copy of the GNU General Public License
     17    along with this program; if not, write to the Free Software Foundation,
     18    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
     19 
     20 #ifdef HAVE_CONFIG_H
     21 # include "config.h"
     22 #endif
     23 
     24 #include <assert.h>
     25 #include <errno.h>
     26 #include <limits.h>
     27 #include <stdbool.h>
     28 #include <stdio.h>
     29 #include <stdlib.h>
     30 #include <string.h>
     31 
     32 #include "message.h"
     33 #include "xgettext.h"
     34 #include "x-tcl.h"
     35 #include "error.h"
     36 #include "xalloc.h"
     37 #include "exit.h"
     38 #include "hash.h"
     39 #include "c-ctype.h"
     40 #include "po-charset.h"
     41 #include "ucs4-utf8.h"
     42 #include "gettext.h"
     43 
     44 #define _(s) gettext(s)
     45 
     46 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
     47 
     48 
     49 /* The Tcl syntax is defined in the Tcl.n manual page.
     50    Summary of Tcl syntax:
     51    Like sh syntax, except that `...` is replaced with [...]. In detail:
     52    - In a preprocessing pass, backslash-newline-anywhitespace is replaced
     53      with single space.
     54    - Input is broken into words, which are then subject to command
     55      substitution [...] , variable substitution $var, backslash substitution
     56      \escape.
     57    - Strings are enclosed in "..."; command substitution, variable
     58      substitution and backslash substitutions are performed here as well.
     59    - {...} is a string without substitutions.
     60    - The list of resulting words is split into commands by semicolon and
     61      newline.
     62    - '#' at the beginning of a command introduces a comment until end of line.
     63    The parser is implemented in tcl8.3.3/generic/tclParse.c.  */
     64 
     65 
     66 /* ====================== Keyword set customization.  ====================== */
     67 
     68 /* If true extract all strings.  */
     69 static bool extract_all = false;
     70 
     71 static hash_table keywords;
     72 static bool default_keywords = true;
     73 
     74 
     75 void
     76 x_tcl_extract_all ()
     77 {
     78   extract_all = true;
     79 }
     80 
     81 
     82 void
     83 x_tcl_keyword (const char *name)
     84 {
     85   if (name == NULL)
     86     default_keywords = false;
     87   else
     88     {
     89       const char *end;
     90       struct callshape shape;
     91 
     92       if (keywords.table == NULL)
     93 	hash_init (&keywords, 100);
     94 
     95       split_keywordspec (name, &end, &shape);
     96 
     97       /* The characters between name and end should form a valid Tcl
     98 	 function name.  A leading "::" is redundant.  */
     99       if (end - name >= 2 && name[0] == ':' && name[1] == ':')
    100 	name += 2;
    101 
    102       insert_keyword_callshape (&keywords, name, end - name, &shape);
    103     }
    104 }
    105 
    106 /* Finish initializing the keywords hash table.
    107    Called after argument processing, before each file is processed.  */
    108 static void
    109 init_keywords ()
    110 {
    111   if (default_keywords)
    112     {
    113       /* When adding new keywords here, also update the documentation in
    114 	 xgettext.texi!  */
    115       x_tcl_keyword ("::msgcat::mc");
    116       default_keywords = false;
    117     }
    118 }
    119 
    120 void
    121 init_flag_table_tcl ()
    122 {
    123   xgettext_record_flag ("::msgcat::mc:1:pass-tcl-format");
    124   xgettext_record_flag ("format:1:tcl-format");
    125 }
    126 
    127 
    128 /* ======================== Reading of characters.  ======================== */
    129 
    130 /* Real filename, used in error messages about the input file.  */
    131 static const char *real_file_name;
    132 
    133 /* Logical filename and line number, used to label the extracted messages.  */
    134 static char *logical_file_name;
    135 static int line_number;
    136 
    137 /* The input file stream.  */
    138 static FILE *fp;
    139 
    140 
    141 /* Fetch the next character from the input file.  */
    142 static int
    143 do_getc ()
    144 {
    145   int c = getc (fp);
    146 
    147   if (c == EOF)
    148     {
    149       if (ferror (fp))
    150 	error (EXIT_FAILURE, errno, _("\
    151 error while reading \"%s\""), real_file_name);
    152     }
    153   else if (c == '\n')
    154    line_number++;
    155 
    156   return c;
    157 }
    158 
    159 /* Put back the last fetched character, not EOF.  */
    160 static void
    161 do_ungetc (int c)
    162 {
    163   if (c == '\n')
    164     line_number--;
    165   ungetc (c, fp);
    166 }
    167 
    168 
    169 /* Combine backslash followed by newline and additional whitespace to
    170    a single space.  */
    171 
    172 /* An int that becomes a space when casted to 'unsigned char'.  */
    173 #define BS_NL (UCHAR_MAX + 1 + ' ')
    174 
    175 static int phase1_pushback[1];
    176 static int phase1_pushback_length;
    177 
    178 static int
    179 phase1_getc ()
    180 {
    181   int c;
    182 
    183   if (phase1_pushback_length)
    184     {
    185       c = phase1_pushback[--phase1_pushback_length];
    186       if (c == '\n' || c == BS_NL)
    187 	++line_number;
    188       return c;
    189     }
    190   c = do_getc ();
    191   if (c != '\\')
    192     return c;
    193   c = do_getc ();
    194   if (c != '\n')
    195     {
    196       if (c != EOF)
    197 	do_ungetc (c);
    198       return '\\';
    199     }
    200   for (;;)
    201     {
    202       c = do_getc ();
    203       if (!(c == ' ' || c == '\t'))
    204 	break;
    205     }
    206   if (c != EOF)
    207     do_ungetc (c);
    208   return BS_NL;
    209 }
    210 
    211 /* Supports only one pushback character.  */
    212 static void
    213 phase1_ungetc (int c)
    214 {
    215   switch (c)
    216     {
    217     case EOF:
    218       break;
    219 
    220     case '\n':
    221     case BS_NL:
    222       --line_number;
    223       /* FALLTHROUGH */
    224 
    225     default:
    226       if (phase1_pushback_length == SIZEOF (phase1_pushback))
    227 	abort ();
    228       phase1_pushback[phase1_pushback_length++] = c;
    229       break;
    230     }
    231 }
    232 
    233 
    234 /* Keep track of brace nesting depth.
    235    When a word starts with an opening brace, a character group begins that
    236    ends with the corresponding closing brace.  In theory these character
    237    groups are string literals, but they are used by so many Tcl primitives
    238    (proc, if, ...) as representing command lists, that we treat them as
    239    command lists.  */
    240 
    241 /* An int that becomes a closing brace when casted to 'unsigned char'.  */
    242 #define CL_BRACE (UCHAR_MAX + 1 + '}')
    243 
    244 static int phase2_pushback[2];
    245 static int phase2_pushback_length;
    246 
    247 /* Brace nesting depth inside the current character group.  */
    248 static int brace_depth;
    249 
    250 static int
    251 phase2_push ()
    252 {
    253   int previous_depth = brace_depth;
    254   brace_depth = 1;
    255   return previous_depth;
    256 }
    257 
    258 static void
    259 phase2_pop (int previous_depth)
    260 {
    261   brace_depth = previous_depth;
    262 }
    263 
    264 static int
    265 phase2_getc ()
    266 {
    267   int c;
    268 
    269   if (phase2_pushback_length)
    270     {
    271       c = phase2_pushback[--phase2_pushback_length];
    272       if (c == '\n' || c == BS_NL)
    273 	++line_number;
    274       else if (c == '{')
    275 	++brace_depth;
    276       else if (c == '}')
    277 	--brace_depth;
    278       return c;
    279     }
    280   c = phase1_getc ();
    281   if (c == '{')
    282     ++brace_depth;
    283   else if (c == '}')
    284     {
    285       if (--brace_depth == 0)
    286 	c = CL_BRACE;
    287     }
    288   return c;
    289 }
    290 
    291 /* Supports 2 characters of pushback.  */
    292 static void
    293 phase2_ungetc (int c)
    294 {
    295   if (c != EOF)
    296     {
    297       switch (c)
    298 	{
    299 	case '\n':
    300 	case BS_NL:
    301 	  --line_number;
    302 	  break;
    303 
    304 	case '{':
    305 	  --brace_depth;
    306 	  break;
    307 
    308 	case '}':
    309 	  ++brace_depth;
    310 	  break;
    311 	}
    312       if (phase2_pushback_length == SIZEOF (phase2_pushback))
    313 	abort ();
    314       phase2_pushback[phase2_pushback_length++] = c;
    315     }
    316 }
    317 
    318 
    319 /* ========================== Reading of tokens.  ========================== */
    320 
    321 
    322 /* A token consists of a sequence of characters.  */
    323 struct token
    324 {
    325   int allocated;		/* number of allocated 'token_char's */
    326   int charcount;		/* number of used 'token_char's */
    327   char *chars;			/* the token's constituents */
    328 };
    329 
    330 /* Initialize a 'struct token'.  */
    331 static inline void
    332 init_token (struct token *tp)
    333 {
    334   tp->allocated = 10;
    335   tp->chars = (char *) xmalloc (tp->allocated * sizeof (char));
    336   tp->charcount = 0;
    337 }
    338 
    339 /* Free the memory pointed to by a 'struct token'.  */
    340 static inline void
    341 free_token (struct token *tp)
    342 {
    343   free (tp->chars);
    344 }
    345 
    346 /* Ensure there is enough room in the token for one more character.  */
    347 static inline void
    348 grow_token (struct token *tp)
    349 {
    350   if (tp->charcount == tp->allocated)
    351     {
    352       tp->allocated *= 2;
    353       tp->chars = (char *) xrealloc (tp->chars, tp->allocated * sizeof (char));
    354     }
    355 }
    356 
    357 
    358 /* ========================= Accumulating comments ========================= */
    359 
    360 
    361 static char *buffer;
    362 static size_t bufmax;
    363 static size_t buflen;
    364 
    365 static inline void
    366 comment_start ()
    367 {
    368   buflen = 0;
    369 }
    370 
    371 static inline void
    372 comment_add (int c)
    373 {
    374   if (buflen >= bufmax)
    375     {
    376       bufmax = 2 * bufmax + 10;
    377       buffer = xrealloc (buffer, bufmax);
    378     }
    379   buffer[buflen++] = c;
    380 }
    381 
    382 static inline void
    383 comment_line_end ()
    384 {
    385   while (buflen >= 1
    386 	 && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
    387     --buflen;
    388   if (buflen >= bufmax)
    389     {
    390       bufmax = 2 * bufmax + 10;
    391       buffer = xrealloc (buffer, bufmax);
    392     }
    393   buffer[buflen] = '\0';
    394   savable_comment_add (buffer);
    395 }
    396 
    397 
    398 /* These are for tracking whether comments count as immediately before
    399    keyword.  */
    400 static int last_comment_line;
    401 static int last_non_comment_line;
    402 
    403 
    404 /* ========================= Accumulating messages ========================= */
    405 
    406 
    407 static message_list_ty *mlp;
    408 
    409 
    410 /* ========================== Reading of commands ========================== */
    411 
    412 
    413 /* We are only interested in constant strings (e.g. "msgcat::mc" or other
    414    string literals).  Other words need not to be represented precisely.  */
    415 enum word_type
    416 {
    417   t_string,	/* constant string */
    418   t_other,	/* other string */
    419   t_separator,	/* command separator: semicolon or newline */
    420   t_bracket,	/* ']' pseudo word */
    421   t_brace,	/* '}' pseudo word */
    422   t_eof		/* EOF marker */
    423 };
    424 
    425 struct word
    426 {
    427   enum word_type type;
    428   struct token *token;		/* for t_string */
    429   int line_number_at_start;	/* for t_string */
    430 };
    431 
    432 /* Free the memory pointed to by a 'struct word'.  */
    433 static inline void
    434 free_word (struct word *wp)
    435 {
    436   if (wp->type == t_string)
    437     {
    438       free_token (wp->token);
    439       free (wp->token);
    440     }
    441 }
    442 
    443 /* Convert a t_string token to a char*.  */
    444 static char *
    445 string_of_word (const struct word *wp)
    446 {
    447   char *str;
    448   int n;
    449 
    450   if (!(wp->type == t_string))
    451     abort ();
    452   n = wp->token->charcount;
    453   str = (char *) xmalloc (n + 1);
    454   memcpy (str, wp->token->chars, n);
    455   str[n] = '\0';
    456   return str;
    457 }
    458 
    459 
    460 /* Context lookup table.  */
    461 static flag_context_list_table_ty *flag_context_list_table;
    462 
    463 
    464 /* Read an escape sequence.  The value is an ISO-8859-1 character (in the
    465    range 0x00..0xff) or a Unicode character (in the range 0x0000..0xffff).  */
    466 static int
    467 do_getc_escaped ()
    468 {
    469   int c;
    470 
    471   c = phase1_getc ();
    472   switch (c)
    473     {
    474     case EOF:
    475       return '\\';
    476     case 'a':
    477       return '\a';
    478     case 'b':
    479       return '\b';
    480     case 'f':
    481       return '\f';
    482     case 'n':
    483       return '\n';
    484     case 'r':
    485       return '\r';
    486     case 't':
    487       return '\t';
    488     case 'v':
    489       return '\v';
    490     case 'x':
    491       {
    492 	int n = 0;
    493 	unsigned int i;
    494 
    495 	for (i = 0;; i++)
    496 	  {
    497 	    c = phase1_getc ();
    498 	    if (c == EOF || !c_isxdigit ((unsigned char) c))
    499 	      break;
    500 
    501 	    if (c >= '0' && c <= '9')
    502 	      n = (n << 4) + (c - '0');
    503 	    else if (c >= 'A' && c <= 'F')
    504 	      n = (n << 4) + (c - 'A' + 10);
    505 	    else if (c >= 'a' && c <= 'f')
    506 	      n = (n << 4) + (c - 'a' + 10);
    507 	  }
    508 	phase1_ungetc (c);
    509 	return (i > 0 ? (unsigned char) n : 'x');
    510       }
    511     case 'u':
    512       {
    513 	int n = 0;
    514 	unsigned int i;
    515 
    516 	for (i = 0; i < 4; i++)
    517 	  {
    518 	    c = phase1_getc ();
    519 	    if (c == EOF || !c_isxdigit ((unsigned char) c))
    520 	      break;
    521 
    522 	    if (c >= '0' && c <= '9')
    523 	      n = (n << 4) + (c - '0');
    524 	    else if (c >= 'A' && c <= 'F')
    525 	      n = (n << 4) + (c - 'A' + 10);
    526 	    else if (c >= 'a' && c <= 'f')
    527 	      n = (n << 4) + (c - 'a' + 10);
    528 	  }
    529 	phase1_ungetc (c);
    530 	return (i > 0 ? n : 'u');
    531       }
    532     case '0': case '1': case '2': case '3': case '4':
    533     case '5': case '6': case '7':
    534       {
    535 	int n = c - '0';
    536 
    537 	c = phase1_getc ();
    538 	if (c != EOF)
    539 	  {
    540 	    if (c >= '0' && c <= '7')
    541 	      {
    542 		n = (n << 3) + (c - '0');
    543 		c = phase1_getc ();
    544 		if (c != EOF)
    545 		  {
    546 		    if (c >= '0' && c <= '7')
    547 		      n = (n << 3) + (c - '0');
    548 		    else
    549 		      phase1_ungetc (c);
    550 		  }
    551 	      }
    552 	    else
    553 	      phase1_ungetc (c);
    554 	  }
    555 	return (unsigned char) n;
    556       }
    557     default:
    558       /* Note: If c is non-ASCII, Tcl's behaviour is undefined here.  */
    559       return (unsigned char) c;
    560     }
    561 }
    562 
    563 
    564 enum terminator
    565 {
    566   te_space_separator,		/* looking for space semicolon newline */
    567   te_space_separator_bracket,	/* looking for space semicolon newline ']' */
    568   te_paren,			/* looking for ')' */
    569   te_quote			/* looking for '"' */
    570 };
    571 
    572 /* Forward declaration of local functions.  */
    573 static enum word_type read_command_list (int looking_for,
    574 					 flag_context_ty outer_context);
    575 
    576 /* Accumulate tokens into the given word.
    577    'looking_for' denotes a parse terminator combination.
    578    Return the first character past the token.  */
    579 static int
    580 accumulate_word (struct word *wp, enum terminator looking_for,
    581 		 flag_context_ty context)
    582 {
    583   int c;
    584 
    585   for (;;)
    586     {
    587       c = phase2_getc ();
    588 
    589       if (c == EOF || c == CL_BRACE)
    590 	return c;
    591       if ((looking_for == te_space_separator
    592 	   || looking_for == te_space_separator_bracket)
    593 	  && (c == ' ' || c == BS_NL
    594 	      || c == '\t' || c == '\v' || c == '\f' || c == '\r'
    595 	      || c == ';' || c == '\n'))
    596 	return c;
    597       if (looking_for == te_space_separator_bracket && c == ']')
    598 	return c;
    599       if (looking_for == te_paren && c == ')')
    600 	return c;
    601       if (looking_for == te_quote && c == '"')
    602 	return c;
    603 
    604       if (c == '$')
    605 	{
    606 	  /* Distinguish $varname, ${varname} and lone $.  */
    607 	  c = phase2_getc ();
    608 	  if (c == '{')
    609 	    {
    610 	      /* ${varname} */
    611 	      do
    612 		c = phase2_getc ();
    613 	      while (c != EOF && c != '}');
    614 	      wp->type = t_other;
    615 	    }
    616 	  else
    617 	    {
    618 	      bool nonempty = false;
    619 
    620 	      for (; c != EOF && c != CL_BRACE; c = phase2_getc ())
    621 		{
    622 		  if (c_isalnum ((unsigned char) c) || (c == '_'))
    623 		    {
    624 		      nonempty = true;
    625 		      continue;
    626 		    }
    627 		  if (c == ':')
    628 		    {
    629 		      c = phase2_getc ();
    630 		      if (c == ':')
    631 			{
    632 			  do
    633 			    c = phase2_getc ();
    634 			  while (c == ':');
    635 
    636 			  phase2_ungetc (c);
    637 			  nonempty = true;
    638 			  continue;
    639 			}
    640 		      phase2_ungetc (c);
    641 		      c = ':';
    642 		    }
    643 		  break;
    644 		}
    645 	      if (c == '(')
    646 		{
    647 		  /* $varname(index) */
    648 		  struct word index_word;
    649 
    650 		  index_word.type = t_other;
    651 		  c = accumulate_word (&index_word, te_paren, null_context);
    652 		  if (c != EOF && c != ')')
    653 		    phase2_ungetc (c);
    654 		  wp->type = t_other;
    655 		}
    656 	      else
    657 		{
    658 		  phase2_ungetc (c);
    659 		  if (nonempty)
    660 		    {
    661 		      /* $varname */
    662 		      wp->type = t_other;
    663 		    }
    664 		  else
    665 		    {
    666 		      /* lone $ */
    667 		      if (wp->type == t_string)
    668 			{
    669 			  grow_token (wp->token);
    670 			  wp->token->chars[wp->token->charcount++] = '$';
    671 			}
    672 		    }
    673 		}
    674 	    }
    675 	}
    676       else if (c == '[')
    677 	{
    678 	  read_command_list (']', context);
    679 	  wp->type = t_other;
    680 	}
    681       else if (c == '\\')
    682 	{
    683 	  unsigned int uc;
    684 	  unsigned char utf8buf[6];
    685 	  int count;
    686 	  int i;
    687 
    688 	  uc = do_getc_escaped ();
    689 	  assert (uc < 0x10000);
    690 	  count = u8_uctomb (utf8buf, uc, 6);
    691 	  assert (count > 0);
    692 	  if (wp->type == t_string)
    693 	    for (i = 0; i < count; i++)
    694 	      {
    695 		grow_token (wp->token);
    696 		wp->token->chars[wp->token->charcount++] = utf8buf[i];
    697 	      }
    698 	}
    699       else
    700 	{
    701 	  if (wp->type == t_string)
    702 	    {
    703 	      grow_token (wp->token);
    704 	      wp->token->chars[wp->token->charcount++] = (unsigned char) c;
    705 	    }
    706 	}
    707     }
    708 }
    709 
    710 
    711 /* Read the next word.
    712    'looking_for' denotes a parse terminator, either ']' or '\0'.  */
    713 static void
    714 read_word (struct word *wp, int looking_for, flag_context_ty context)
    715 {
    716   int c;
    717 
    718   do
    719     c = phase2_getc ();
    720   while (c == ' ' || c == BS_NL
    721 	 || c == '\t' || c == '\v' || c == '\f' || c == '\r');
    722 
    723   if (c == EOF)
    724     {
    725       wp->type = t_eof;
    726       return;
    727     }
    728 
    729   if (c == CL_BRACE)
    730     {
    731       wp->type = t_brace;
    732       last_non_comment_line = line_number;
    733       return;
    734     }
    735 
    736   if (c == '\n')
    737     {
    738       /* Comments assumed to be grouped with a message must immediately
    739 	 precede it, with no non-whitespace token on a line between both.  */
    740       if (last_non_comment_line > last_comment_line)
    741 	savable_comment_reset ();
    742       wp->type = t_separator;
    743       return;
    744     }
    745 
    746   if (c == ';')
    747     {
    748       wp->type = t_separator;
    749       last_non_comment_line = line_number;
    750       return;
    751     }
    752 
    753   if (looking_for == ']' && c == ']')
    754     {
    755       wp->type = t_bracket;
    756       last_non_comment_line = line_number;
    757       return;
    758     }
    759 
    760   if (c == '{')
    761     {
    762       int previous_depth;
    763       enum word_type terminator;
    764 
    765       /* Start a new nested character group, which lasts until the next
    766 	 balanced '}' (ignoring \} things).  */
    767       previous_depth = phase2_push () - 1;
    768 
    769       /* Interpret it as a command list.  */
    770       terminator = read_command_list ('\0', null_context);
    771 
    772       if (terminator == t_brace)
    773 	phase2_pop (previous_depth);
    774 
    775       wp->type = t_other;
    776       last_non_comment_line = line_number;
    777       return;
    778     }
    779 
    780   wp->type = t_string;
    781   wp->token = (struct token *) xmalloc (sizeof (struct token));
    782   init_token (wp->token);
    783   wp->line_number_at_start = line_number;
    784 
    785   if (c == '"')
    786     {
    787       c = accumulate_word (wp, te_quote, context);
    788       if (c != EOF && c != '"')
    789 	phase2_ungetc (c);
    790     }
    791   else
    792     {
    793       phase2_ungetc (c);
    794       c = accumulate_word (wp,
    795 			   looking_for == ']'
    796 			   ? te_space_separator_bracket
    797 			   : te_space_separator,
    798 			   context);
    799       if (c != EOF)
    800 	phase2_ungetc (c);
    801     }
    802 
    803   if (wp->type != t_string)
    804     {
    805       free_token (wp->token);
    806       free (wp->token);
    807     }
    808   last_non_comment_line = line_number;
    809 }
    810 
    811 
    812 /* Read the next command.
    813    'looking_for' denotes a parse terminator, either ']' or '\0'.
    814    Returns the type of the word that terminated the command: t_separator or
    815    t_bracket (only if looking_for is ']') or t_brace or t_eof.  */
    816 static enum word_type
    817 read_command (int looking_for, flag_context_ty outer_context)
    818 {
    819   int c;
    820 
    821   /* Skip whitespace and comments.  */
    822   for (;;)
    823     {
    824       c = phase2_getc ();
    825 
    826       if (c == ' ' || c == BS_NL
    827 	  || c == '\t' || c == '\v' || c == '\f' || c == '\r')
    828 	continue;
    829       if (c == '#')
    830 	{
    831 	  /* Skip a comment up to end of line.  */
    832 	  last_comment_line = line_number;
    833 	  comment_start ();
    834 	  for (;;)
    835 	    {
    836 	      c = phase2_getc ();
    837 	      if (c == EOF || c == CL_BRACE || c == '\n')
    838 		break;
    839 	      /* We skip all leading white space, but not EOLs.  */
    840 	      if (!(buflen == 0 && (c == ' ' || c == '\t')))
    841 		comment_add (c);
    842 	    }
    843 	  comment_line_end ();
    844 	  continue;
    845 	}
    846       break;
    847     }
    848   phase2_ungetc (c);
    849 
    850   /* Read the words that make up the command.  */
    851   {
    852     int arg = 0;		/* Current argument number.  */
    853     flag_context_list_iterator_ty context_iter;
    854     const struct callshapes *shapes = NULL;
    855     struct arglist_parser *argparser = NULL;
    856 
    857     for (;; arg++)
    858       {
    859 	struct word inner;
    860 	flag_context_ty inner_context;
    861 
    862 	if (arg == 0)
    863 	  inner_context = null_context;
    864 	else
    865 	  inner_context =
    866 	    inherited_context (outer_context,
    867 			       flag_context_list_iterator_advance (
    868 				 &context_iter));
    869 
    870 	read_word (&inner, looking_for, inner_context);
    871 
    872 	/* Recognize end of command.  */
    873 	if (inner.type == t_separator || inner.type == t_bracket
    874 	    || inner.type == t_brace || inner.type == t_eof)
    875 	  {
    876 	    if (argparser != NULL)
    877 	      arglist_parser_done (argparser, arg);
    878 	    return inner.type;
    879 	  }
    880 
    881 	if (extract_all)
    882 	  {
    883 	    if (inner.type == t_string)
    884 	      {
    885 		lex_pos_ty pos;
    886 
    887 		pos.file_name = logical_file_name;
    888 		pos.line_number = inner.line_number_at_start;
    889 		remember_a_message (mlp, NULL, string_of_word (&inner),
    890 				    inner_context, &pos, savable_comment);
    891 	      }
    892 	  }
    893 
    894 	if (arg == 0)
    895 	  {
    896 	    /* This is the function position.  */
    897 	    if (inner.type == t_string)
    898 	      {
    899 		char *function_name = string_of_word (&inner);
    900 		char *stripped_name;
    901 		void *keyword_value;
    902 
    903 		/* A leading "::" is redundant.  */
    904 		stripped_name = function_name;
    905 		if (function_name[0] == ':' && function_name[1] == ':')
    906 		  stripped_name += 2;
    907 
    908 		if (hash_find_entry (&keywords,
    909 				     stripped_name, strlen (stripped_name),
    910 				     &keyword_value)
    911 		    == 0)
    912 		  shapes = (const struct callshapes *) keyword_value;
    913 
    914 		argparser = arglist_parser_alloc (mlp, shapes);
    915 
    916 		context_iter =
    917 		  flag_context_list_iterator (
    918 		    flag_context_list_table_lookup (
    919 		      flag_context_list_table,
    920 		      stripped_name, strlen (stripped_name)));
    921 
    922 		free (function_name);
    923 	      }
    924 	    else
    925 	      context_iter = null_context_list_iterator;
    926 	  }
    927 	else
    928 	  {
    929 	    /* These are the argument positions.  */
    930 	    if (argparser != NULL && inner.type == t_string)
    931 	      arglist_parser_remember (argparser, arg,
    932 				       string_of_word (&inner),
    933 				       inner_context,
    934 				       logical_file_name,
    935 				       inner.line_number_at_start,
    936 				       savable_comment);
    937 	  }
    938 
    939 	free_word (&inner);
    940       }
    941   }
    942 }
    943 
    944 
    945 /* Read a list of commands.
    946    'looking_for' denotes a parse terminator, either ']' or '\0'.
    947    Returns the type of the word that terminated the command list:
    948    t_bracket (only if looking_for is ']') or t_brace or t_eof.  */
    949 static enum word_type
    950 read_command_list (int looking_for, flag_context_ty outer_context)
    951 {
    952   for (;;)
    953     {
    954       enum word_type terminator;
    955 
    956       terminator = read_command (looking_for, outer_context);
    957       if (terminator != t_separator)
    958 	return terminator;
    959     }
    960 }
    961 
    962 
    963 void
    964 extract_tcl (FILE *f,
    965 	     const char *real_filename, const char *logical_filename,
    966 	     flag_context_list_table_ty *flag_table,
    967 	     msgdomain_list_ty *mdlp)
    968 {
    969   mlp = mdlp->item[0]->messages;
    970 
    971   /* We convert our strings to UTF-8 encoding.  */
    972   xgettext_current_source_encoding = po_charset_utf8;
    973 
    974   fp = f;
    975   real_file_name = real_filename;
    976   logical_file_name = xstrdup (logical_filename);
    977   line_number = 1;
    978 
    979   /* Initially, no brace is open.  */
    980   brace_depth = 1000000;
    981 
    982   last_comment_line = -1;
    983   last_non_comment_line = -1;
    984 
    985   flag_context_list_table = flag_table;
    986 
    987   init_keywords ();
    988 
    989   /* Eat tokens until eof is seen.  */
    990   read_command_list ('\0', null_context);
    991 
    992   fp = NULL;
    993   real_file_name = NULL;
    994   logical_file_name = NULL;
    995   line_number = 0;
    996 }
    997