Home | History | Annotate | Line # | Download | only in util
texindex.c revision 1.2.18.1
      1 /*	$NetBSD: texindex.c,v 1.2.18.1 2026/01/22 19:02:27 martin Exp $	*/
      2 
      3 /* texindex -- sort TeX index dribble output into an actual index.
      4    Id: texindex.c,v 1.11 2004/04/11 17:56:47 karl Exp
      5 
      6    Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001,
      7    2002, 2003, 2004 Free Software Foundation, Inc.
      8 
      9    This program is free software; you can redistribute it and/or modify
     10    it under the terms of the GNU General Public License as published by
     11    the Free Software Foundation; either version 2, or (at your option)
     12    any later version.
     13 
     14    This program is distributed in the hope that it will be useful,
     15    but WITHOUT ANY WARRANTY; without even the implied warranty of
     16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     17    GNU General Public License for more details.
     18 
     19    You should have received a copy of the GNU General Public License
     20    along with this program; if not, write to the Free Software
     21    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
     22 
     23 #include "system.h"
     24 #include <getopt.h>
     25 
     26 static char *program_name = "texindex";
     27 
     28 #if defined (emacs)
     29 #  include "../src/config.h"
     30 /* Some s/os.h files redefine these. */
     31 #  undef read
     32 #  undef close
     33 #  undef write
     34 #  undef open
     35 #endif
     36 
     37 #if !defined (HAVE_MEMSET)
     38 #undef memset
     39 #define memset(ptr, ignore, count) bzero (ptr, count)
     40 #endif
     41 
     42 #if !defined (SEEK_SET)
     43 #  define SEEK_SET 0
     44 #  define SEEK_CUR 1
     45 #  define SEEK_END 2
     46 #endif /* !SEEK_SET */
     47 
     48 /* When sorting in core, this structure describes one line
     49    and the position and length of its first keyfield.  */
     50 struct lineinfo
     51 {
     52   char *text;           /* The actual text of the line. */
     53   union {
     54     char *text;         /* The start of the key (for textual comparison). */
     55     long number;        /* The numeric value (for numeric comparison). */
     56   } key;
     57   long keylen;          /* Length of KEY field. */
     58   size_t idx;		/* tie breaker */
     59 };
     60 
     61 /* This structure describes a field to use as a sort key. */
     62 struct keyfield
     63 {
     64   int startwords;       /* Number of words to skip. */
     65   int startchars;       /* Number of additional chars to skip. */
     66   int endwords;         /* Number of words to ignore at end. */
     67   int endchars;         /* Ditto for characters of last word. */
     68   char ignore_blanks;   /* Non-zero means ignore spaces and tabs. */
     69   char fold_case;       /* Non-zero means case doesn't matter. */
     70   char reverse;         /* Non-zero means compare in reverse order. */
     71   char numeric;         /* Non-zeros means field is ASCII numeric. */
     72   char positional;      /* Sort according to file position. */
     73   char braced;          /* Count balanced-braced groupings as fields. */
     74 };
     75 
     76 /* Vector of keyfields to use. */
     77 struct keyfield keyfields[3];
     78 
     79 /* Number of keyfields stored in that vector.  */
     80 int num_keyfields = 3;
     81 
     82 /* Vector of input file names, terminated with a null pointer. */
     83 char **infiles;
     84 
     85 /* Vector of corresponding output file names, or NULL, meaning default it
     86    (add an `s' to the end). */
     87 char **outfiles;
     88 
     89 /* Length of `infiles'. */
     90 int num_infiles;
     91 
     92 /* Pointer to the array of pointers to lines being sorted. */
     93 char **linearray;
     94 
     95 /* The allocated length of `linearray'. */
     96 long nlines;
     97 
     98 /* During in-core sort, this points to the base of the data block
     99    which contains all the lines of data.  */
    100 char *text_base;
    101 
    102 /* Initially 0; changed to 1 if we want initials in this index.  */
    103 int need_initials;
    104 
    105 /* Remembers the first initial letter seen in this index, so we can
    106    determine whether we need initials in the sorted form.  */
    107 char first_initial;
    108 
    109 /* Forward declarations of functions in this file. */
    110 void decode_command (int argc, char **argv);
    111 void sort_in_core (char *infile, int total, char *outfile);
    112 char **parsefile (char *filename, char **nextline, char *data, long int size);
    113 char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr);
    114 char *find_pos (char *str, int words, int chars, int ignore_blanks);
    115 long find_value (char *start, long int length);
    116 char *find_braced_pos (char *str, int words, int chars, int ignore_blanks);
    117 char *find_braced_end (char *str);
    118 void writelines (char **linearray, int nlines, FILE *ostream);
    119 int compare_field (struct keyfield *keyfield, char *start1,
    120                    long int length1, long int pos1, char *start2,
    121                    long int length2, long int pos2);
    122 int compare_full (const void *, const void *);
    123 void pfatal_with_name (const char *name);
    124 void fatal (const char *format, const char *arg);
    125 void error (const char *format, const char *arg);
    126 void *xmalloc (size_t), *xrealloc (void *, size_t);
    127 static char *concat3 (const char *, const char *, const char *);
    128 
    129 int
    131 main (int argc, char **argv)
    132 {
    133   int i;
    134 
    135 #ifdef HAVE_SETLOCALE
    136   /* Set locale via LC_ALL.  */
    137   setlocale (LC_ALL, "");
    138 #endif
    139 
    140   /* Set the text message domain.  */
    141   bindtextdomain (PACKAGE, LOCALEDIR);
    142   textdomain (PACKAGE);
    143 
    144   /* In case we write to a redirected stdout that fails.  */
    145   /* not ready atexit (close_stdout); */
    146 
    147   /* Describe the kind of sorting to do. */
    148   /* The first keyfield uses the first braced field and folds case. */
    149   keyfields[0].braced = 1;
    150   keyfields[0].fold_case = 1;
    151   keyfields[0].endwords = -1;
    152   keyfields[0].endchars = -1;
    153 
    154   /* The second keyfield uses the second braced field, numerically. */
    155   keyfields[1].braced = 1;
    156   keyfields[1].numeric = 1;
    157   keyfields[1].startwords = 1;
    158   keyfields[1].endwords = -1;
    159   keyfields[1].endchars = -1;
    160 
    161   /* The third keyfield (which is ignored while discarding duplicates)
    162      compares the whole line. */
    163   keyfields[2].endwords = -1;
    164   keyfields[2].endchars = -1;
    165 
    166   decode_command (argc, argv);
    167 
    168   /* Process input files completely, one by one.  */
    169 
    170   for (i = 0; i < num_infiles; i++)
    171     {
    172       int desc;
    173       off_t ptr;
    174       char *outfile;
    175       struct stat instat;
    176 
    177       desc = open (infiles[i], O_RDONLY, 0);
    178       if (desc < 0)
    179         pfatal_with_name (infiles[i]);
    180 
    181       if (stat (infiles[i], &instat))
    182         pfatal_with_name (infiles[i]);
    183       if (S_ISDIR (instat.st_mode))
    184         {
    185 #ifdef EISDIR
    186           errno = EISDIR;
    187 #endif
    188           pfatal_with_name (infiles[i]);
    189         }
    190 
    191       lseek (desc, (off_t) 0, SEEK_END);
    192       ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR);
    193 
    194       close (desc);
    195 
    196       outfile = outfiles[i];
    197       if (!outfile)
    198         outfile = concat3 (infiles[i], "s", "");
    199 
    200       need_initials = 0;
    201       first_initial = '\0';
    202 
    203       if (ptr != (int)ptr)
    204 	{
    205 	  fprintf (stderr, "%s: %s: file too large\n", program_name,
    206 		   infiles[i]);
    207 	  xexit (1);
    208 	}
    209       sort_in_core (infiles[i], (int)ptr, outfile);
    210     }
    211 
    212   xexit (0);
    213   return 0; /* Avoid bogus warnings.  */
    214 }
    215 
    216 typedef struct
    218 {
    219   char *long_name;
    220   char *short_name;
    221   int *variable_ref;
    222   int variable_value;
    223   char *arg_name;
    224   char *doc_string;
    225 } TEXINDEX_OPTION;
    226 
    227 TEXINDEX_OPTION texindex_options[] = {
    228   { "--help", "-h", (int *)NULL, 0, (char *)NULL,
    229       N_("display this help and exit") },
    230   { "--output", "-o", (int *)NULL, 0, "FILE",
    231       N_("send output to FILE") },
    232   { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
    233       N_("display version information and exit") },
    234   { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
    235 };
    236 
    237 void
    238 usage (int result_value)
    239 {
    240   register int i;
    241   FILE *f = result_value ? stderr : stdout;
    242 
    243   fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name);
    244   fprintf (f, _("Generate a sorted index for each TeX output FILE.\n"));
    245   /* Avoid trigraph nonsense.  */
    246   fprintf (f,
    247 _("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"),
    248            '?', '?'); /* avoid trigraph in cat-id-tbl.c */
    249   fprintf (f, _("\nOptions:\n"));
    250 
    251   for (i = 0; texindex_options[i].long_name; i++)
    252     {
    253       putc (' ', f);
    254 
    255       if (texindex_options[i].short_name)
    256         fprintf (f, "%s, ", texindex_options[i].short_name);
    257 
    258       fprintf (f, "%s %s",
    259                texindex_options[i].long_name,
    260                texindex_options[i].arg_name
    261                ? texindex_options[i].arg_name : "");
    262 
    263       fprintf (f, "\t%s\n", _(texindex_options[i].doc_string));
    264     }
    265   fputs (_("\n\
    266 Email bug reports to bug-texinfo (at) gnu.org,\n\
    267 general questions and discussion to help-texinfo (at) gnu.org.\n\
    268 Texinfo home page: http://www.gnu.org/software/texinfo/"), f);
    269   fputs ("\n", f);
    270 
    271   xexit (result_value);
    272 }
    273 
    274 /* Decode the command line arguments to set the parameter variables
    275    and set up the vector of keyfields and the vector of input files. */
    276 
    277 void
    278 decode_command (int argc, char **argv)
    279 {
    280   int arg_index = 1;
    281   char **ip;
    282   char **op;
    283 
    284   /* Allocate ARGC input files, which must be enough.  */
    285 
    286   infiles = (char **) xmalloc (argc * sizeof (char *));
    287   outfiles = (char **) xmalloc (argc * sizeof (char *));
    288   ip = infiles;
    289   op = outfiles;
    290 
    291   while (arg_index < argc)
    292     {
    293       char *arg = argv[arg_index++];
    294 
    295       if (*arg == '-')
    296         {
    297           if (strcmp (arg, "--version") == 0)
    298             {
    299               printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION);
    300               puts ("");
    301               puts ("Copyright (C) 2004 Free Software Foundation, Inc.");
    302               printf (_("There is NO warranty.  You may redistribute this software\n\
    303 under the terms of the GNU General Public License.\n\
    304 For more information about these matters, see the files named COPYING.\n"));
    305               xexit (0);
    306             }
    307           else if ((strcmp (arg, "--keep") == 0) ||
    308                    (strcmp (arg, "-k") == 0))
    309             {
    310 	      /* Ignore, for backward compatibility */
    311             }
    312           else if ((strcmp (arg, "--help") == 0) ||
    313                    (strcmp (arg, "-h") == 0))
    314             {
    315               usage (0);
    316             }
    317           else if ((strcmp (arg, "--output") == 0) ||
    318                    (strcmp (arg, "-o") == 0))
    319             {
    320               if (argv[arg_index] != (char *)NULL)
    321                 {
    322                   arg_index++;
    323                   if (op > outfiles)
    324                     *(op - 1) = argv[arg_index];
    325                 }
    326               else
    327                 usage (1);
    328             }
    329           else
    330             usage (1);
    331         }
    332       else
    333         {
    334           *ip++ = arg;
    335           *op++ = (char *)NULL;
    336         }
    337     }
    338 
    339   /* Record number of keyfields and terminate list of filenames. */
    340   num_infiles = ip - infiles;
    341   *ip = (char *)NULL;
    342   if (num_infiles == 0)
    343     usage (1);
    344 }
    345 
    346 /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
    348 
    349 int
    350 compare_full (const void *p1, const void *p2)
    351 {
    352   char **line1 = (char **) p1;
    353   char **line2 = (char **) p2;
    354   int i;
    355 
    356   /* Compare using the first keyfield;
    357      if that does not distinguish the lines, try the second keyfield;
    358      and so on. */
    359 
    360   for (i = 0; i < num_keyfields; i++)
    361     {
    362       long length1, length2;
    363       char *start1 = find_field (&keyfields[i], *line1, &length1);
    364       char *start2 = find_field (&keyfields[i], *line2, &length2);
    365       int tem = compare_field (&keyfields[i], start1, length1,
    366                                *line1 - text_base,
    367                                start2, length2, *line2 - text_base);
    368       if (tem)
    369         {
    370           if (keyfields[i].reverse)
    371             return -tem;
    372           return tem;
    373         }
    374     }
    375 
    376   if (*line1 == *line2)
    377     abort ();
    378   return *line1 < *line2 ? -1 : 1;
    379 }
    380 
    381 /* Compare LINE1 and LINE2, described by structures
    382    in which the first keyfield is identified in advance.
    383    For positional sorting, assumes that the order of the lines in core
    384    reflects their nominal order.  */
    385 int
    386 compare_prepared (const void *p1, const void *p2)
    387 {
    388   struct lineinfo *line1 = (struct lineinfo *) p1;
    389   struct lineinfo *line2 = (struct lineinfo *) p2;
    390   int i;
    391   int tem;
    392   char *text1, *text2;
    393 
    394   /* Compare using the first keyfield, which has been found for us already. */
    395   if (keyfields->positional)
    396     {
    397       if (line1->text - text_base > line2->text - text_base)
    398         tem = 1;
    399       else
    400         tem = -1;
    401     }
    402   else if (keyfields->numeric)
    403     tem = line1->key.number - line2->key.number;
    404   else
    405     tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
    406                          line2->key.text, line2->keylen, 0);
    407   if (tem)
    408     {
    409       if (keyfields->reverse)
    410         return -tem;
    411       return tem;
    412     }
    413 
    414   text1 = line1->text;
    415   text2 = line2->text;
    416 
    417   /* Compare using the second keyfield;
    418      if that does not distinguish the lines, try the third keyfield;
    419      and so on. */
    420 
    421   for (i = 1; i < num_keyfields; i++)
    422     {
    423       long length1, length2;
    424       char *start1 = find_field (&keyfields[i], text1, &length1);
    425       char *start2 = find_field (&keyfields[i], text2, &length2);
    426       int tem = compare_field (&keyfields[i], start1, length1,
    427                                text1 - text_base,
    428                                start2, length2, text2 - text_base);
    429       if (tem)
    430         {
    431           if (keyfields[i].reverse)
    432             return -tem;
    433           return tem;
    434         }
    435     }
    436 
    437   if (line1->idx == line2->idx)
    438     abort ();
    439   return line1->idx < line2->idx ? -1 : 1;
    440 }
    441 
    442 /* Like compare_full but more general.
    443    You can pass any strings, and you can say how many keyfields to use.
    444    POS1 and POS2 should indicate the nominal positional ordering of
    445    the two lines in the input.  */
    446 
    447 int
    448 compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields)
    449 {
    450   int i;
    451 
    452   /* Compare using the first keyfield;
    453      if that does not distinguish the lines, try the second keyfield;
    454      and so on. */
    455 
    456   for (i = 0; i < use_keyfields; i++)
    457     {
    458       long length1, length2;
    459       char *start1 = find_field (&keyfields[i], str1, &length1);
    460       char *start2 = find_field (&keyfields[i], str2, &length2);
    461       int tem = compare_field (&keyfields[i], start1, length1, pos1,
    462                                start2, length2, pos2);
    463       if (tem)
    464         {
    465           if (keyfields[i].reverse)
    466             return -tem;
    467           return tem;
    468         }
    469     }
    470 
    471   return 0;                     /* Lines match exactly. */
    472 }
    473 
    474 /* Find the start and length of a field in STR according to KEYFIELD.
    475    A pointer to the starting character is returned, and the length
    476    is stored into the int that LENGTHPTR points to.  */
    477 
    478 char *
    479 find_field (struct keyfield *keyfield, char *str, long int *lengthptr)
    480 {
    481   char *start;
    482   char *end;
    483   char *(*fun) (char*, int, int, int);
    484 
    485   if (keyfield->braced)
    486     fun = find_braced_pos;
    487   else
    488     fun = find_pos;
    489 
    490   start = (*fun) (str, keyfield->startwords, keyfield->startchars,
    491                   keyfield->ignore_blanks);
    492   if (keyfield->endwords < 0)
    493     {
    494       if (keyfield->braced)
    495         end = find_braced_end (start);
    496       else
    497         {
    498           end = start;
    499           while (*end && *end != '\n')
    500             end++;
    501         }
    502     }
    503   else
    504     {
    505       end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
    506       if (end - str < start - str)
    507         end = start;
    508     }
    509   *lengthptr = end - start;
    510   return start;
    511 }
    512 
    513 /* Return a pointer to a specified place within STR,
    514    skipping (from the beginning) WORDS words and then CHARS chars.
    515    If IGNORE_BLANKS is nonzero, we skip all blanks
    516    after finding the specified word.  */
    517 
    518 char *
    519 find_pos (char *str, int words, int chars, int ignore_blanks)
    520 {
    521   int i;
    522   char *p = str;
    523 
    524   for (i = 0; i < words; i++)
    525     {
    526       char c;
    527       /* Find next bunch of nonblanks and skip them. */
    528       while ((c = *p) == ' ' || c == '\t')
    529         p++;
    530       while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
    531         p++;
    532       if (!*p || *p == '\n')
    533         return p;
    534     }
    535 
    536   while (*p == ' ' || *p == '\t')
    537     p++;
    538 
    539   for (i = 0; i < chars; i++)
    540     {
    541       if (!*p || *p == '\n')
    542         break;
    543       p++;
    544     }
    545   return p;
    546 }
    547 
    548 /* Like find_pos but assumes that each field is surrounded by braces
    549    and that braces within fields are balanced. */
    550 
    551 char *
    552 find_braced_pos (char *str, int words, int chars, int ignore_blanks)
    553 {
    554   int i;
    555   int bracelevel;
    556   char *p = str;
    557   char c;
    558 
    559   for (i = 0; i < words; i++)
    560     {
    561       bracelevel = 1;
    562       while ((c = *p++) != '{' && c != '\n' && c)
    563         /* Do nothing. */ ;
    564       if (c != '{')
    565         return p - 1;
    566       while (bracelevel)
    567         {
    568           c = *p++;
    569           if (c == '{')
    570             bracelevel++;
    571           if (c == '}')
    572             bracelevel--;
    573           if (c == 0 || c == '\n')
    574             return p - 1;
    575         }
    576     }
    577 
    578   while ((c = *p++) != '{' && c != '\n' && c)
    579     /* Do nothing. */ ;
    580 
    581   if (c != '{')
    582     return p - 1;
    583 
    584   if (ignore_blanks)
    585     while ((c = *p) == ' ' || c == '\t')
    586       p++;
    587 
    588   for (i = 0; i < chars; i++)
    589     {
    590       if (!*p || *p == '\n')
    591         break;
    592       p++;
    593     }
    594   return p;
    595 }
    596 
    597 /* Find the end of the balanced-brace field which starts at STR.
    598    The position returned is just before the closing brace. */
    599 
    600 char *
    601 find_braced_end (char *str)
    602 {
    603   int bracelevel;
    604   char *p = str;
    605   char c;
    606 
    607   bracelevel = 1;
    608   while (bracelevel)
    609     {
    610       c = *p++;
    611       if (c == '{')
    612         bracelevel++;
    613       if (c == '}')
    614         bracelevel--;
    615       if (c == 0 || c == '\n')
    616         return p - 1;
    617     }
    618   return p - 1;
    619 }
    620 
    621 long
    622 find_value (char *start, long int length)
    623 {
    624   while (length != 0L)
    625     {
    626       if (isdigit (*start))
    627         return atol (start);
    628       length--;
    629       start++;
    630     }
    631   return 0l;
    632 }
    633 
    634 /* Vector used to translate characters for comparison.
    635    This is how we make all alphanumerics follow all else,
    636    and ignore case in the first sorting.  */
    637 int char_order[256];
    638 
    639 void
    640 init_char_order (void)
    641 {
    642   int i;
    643   for (i = 1; i < 256; i++)
    644     char_order[i] = i;
    645 
    646   for (i = '0'; i <= '9'; i++)
    647     char_order[i] += 512;
    648 
    649   for (i = 'a'; i <= 'z'; i++)
    650     {
    651       char_order[i] = 512 + i;
    652       char_order[i + 'A' - 'a'] = 512 + i;
    653     }
    654 }
    655 
    656 /* Compare two fields (each specified as a start pointer and a character count)
    657    according to KEYFIELD.
    658    The sign of the value reports the relation between the fields. */
    659 
    660 int
    661 compare_field (struct keyfield *keyfield, char *start1, long int length1,
    662                long int pos1, char *start2, long int length2, long int pos2)
    663 {
    664   if (keyfields->positional)
    665     {
    666       if (pos1 > pos2)
    667         return 1;
    668       else
    669         return -1;
    670     }
    671   if (keyfield->numeric)
    672     {
    673       long value = find_value (start1, length1) - find_value (start2, length2);
    674       if (value > 0)
    675         return 1;
    676       if (value < 0)
    677         return -1;
    678       return 0;
    679     }
    680   else
    681     {
    682       char *p1 = start1;
    683       char *p2 = start2;
    684       char *e1 = start1 + length1;
    685       char *e2 = start2 + length2;
    686 
    687       while (1)
    688         {
    689           int c1, c2;
    690 
    691           if (p1 == e1)
    692             c1 = 0;
    693           else
    694             c1 = *p1++;
    695           if (p2 == e2)
    696             c2 = 0;
    697           else
    698             c2 = *p2++;
    699 
    700           if (char_order[c1] != char_order[c2])
    701             return char_order[c1] - char_order[c2];
    702           if (!c1)
    703             break;
    704         }
    705 
    706       /* Strings are equal except possibly for case.  */
    707       p1 = start1;
    708       p2 = start2;
    709       while (1)
    710         {
    711           int c1, c2;
    712 
    713           if (p1 == e1)
    714             c1 = 0;
    715           else
    716             c1 = *p1++;
    717           if (p2 == e2)
    718             c2 = 0;
    719           else
    720             c2 = *p2++;
    721 
    722           if (c1 != c2)
    723             /* Reverse sign here so upper case comes out last.  */
    724             return c2 - c1;
    725           if (!c1)
    726             break;
    727         }
    728 
    729       return 0;
    730     }
    731 }
    732 
    733 /* Sort INFILE, whose size is TOTAL,
    735    assuming that is small enough to be done in-core,
    736    then indexify it and send the output to OUTFILE (or to stdout).  */
    737 
    738 void
    739 sort_in_core (char *infile, int total, char *outfile)
    740 {
    741   char **nextline;
    742   char *data = (char *) xmalloc (total + 1);
    743   char *file_data;
    744   long file_size;
    745   int i;
    746   FILE *ostream = stdout;
    747   struct lineinfo *lineinfo;
    748 
    749   /* Read the contents of the file into the moby array `data'. */
    750 
    751   int desc = open (infile, O_RDONLY, 0);
    752 
    753   if (desc < 0)
    754     fatal (_("failure reopening %s"), infile);
    755   for (file_size = 0;;)
    756     {
    757       i = read (desc, data + file_size, total - file_size);
    758       if (i <= 0)
    759         break;
    760       file_size += i;
    761     }
    762   file_data = data;
    763   data[file_size] = 0;
    764 
    765   close (desc);
    766 
    767   if (file_size > 0 && data[0] != '\\' && data[0] != '@')
    768     {
    769       error (_("%s: not a texinfo index file"), infile);
    770       return;
    771     }
    772 
    773   init_char_order ();
    774 
    775   /* Sort routines want to know this address. */
    776 
    777   text_base = data;
    778 
    779   /* Create the array of pointers to lines, with a default size
    780      frequently enough.  */
    781 
    782   nlines = total / 50;
    783   if (!nlines)
    784     nlines = 2;
    785   linearray = (char **) xmalloc (nlines * sizeof (char *));
    786 
    787   /* `nextline' points to the next free slot in this array.
    788      `nlines' is the allocated size.  */
    789 
    790   nextline = linearray;
    791 
    792   /* Parse the input file's data, and make entries for the lines.  */
    793 
    794   nextline = parsefile (infile, nextline, file_data, file_size);
    795   if (nextline == 0)
    796     {
    797       error (_("%s: not a texinfo index file"), infile);
    798       return;
    799     }
    800 
    801   /* Sort the lines. */
    802 
    803   /* If we have enough space, find the first keyfield of each line in advance.
    804      Make a `struct lineinfo' for each line, which records the keyfield
    805      as well as the line, and sort them.  */
    806 
    807   lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo));
    808 
    809   if (lineinfo)
    810     {
    811       size_t idx = 0;
    812       struct lineinfo *lp;
    813       char **p;
    814 
    815       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
    816         {
    817 	  lp->idx = idx++;
    818           lp->text = *p;
    819           lp->key.text = find_field (keyfields, *p, &lp->keylen);
    820           if (keyfields->numeric)
    821             lp->key.number = find_value (lp->key.text, lp->keylen);
    822         }
    823 
    824       qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
    825              compare_prepared);
    826 
    827       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
    828         *p = lp->text;
    829 
    830       free (lineinfo);
    831     }
    832   else
    833     qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
    834 
    835   /* Open the output file. */
    836 
    837   if (outfile)
    838     {
    839       ostream = fopen (outfile, "w");
    840       if (!ostream)
    841         pfatal_with_name (outfile);
    842     }
    843 
    844   writelines (linearray, nextline - linearray, ostream);
    845   if (outfile)
    846     fclose (ostream);
    847 
    848   free (linearray);
    849   free (data);
    850 }
    851 
    852 /* Parse an input string in core into lines.
    854    DATA is the input string, and SIZE is its length.
    855    Data goes in LINEARRAY starting at NEXTLINE.
    856    The value returned is the first entry in LINEARRAY still unused.
    857    Value 0 means input file contents are invalid.  */
    858 
    859 char **
    860 parsefile (char *filename, char **nextline, char *data, long int size)
    861 {
    862   char *p, *end;
    863   char **line = nextline;
    864 
    865   p = data;
    866   end = p + size;
    867   *end = 0;
    868 
    869   while (p != end)
    870     {
    871       if (p[0] != '\\' && p[0] != '@')
    872         return 0;
    873 
    874       *line = p;
    875 
    876       /* Find the first letter of the first field of this line.  If it
    877          is different from the first letter of the first field of the
    878          first line, we need initial headers in the output index.  */
    879       while (*p && *p != '{')
    880         p++;
    881       if (p == end)
    882         return 0;
    883       p++;
    884       if (first_initial)
    885         {
    886           if (first_initial != toupper (*p))
    887             need_initials = 1;
    888         }
    889       else
    890         first_initial = toupper (*p);
    891 
    892       while (*p && *p != '\n')
    893         p++;
    894       if (p != end)
    895         p++;
    896 
    897       line++;
    898       if (line == linearray + nlines)
    899         {
    900           char **old = linearray;
    901           linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4));
    902           line += linearray - old;
    903         }
    904     }
    905 
    906   return line;
    907 }
    908 
    909 /* Indexification is a filter applied to the sorted lines
    911    as they are being written to the output file.
    912    Multiple entries for the same name, with different page numbers,
    913    get combined into a single entry with multiple page numbers.
    914    The first braced field, which is used for sorting, is discarded.
    915    However, its first character is examined, folded to lower case,
    916    and if it is different from that in the previous line fed to us
    917    a \initial line is written with one argument, the new initial.
    918 
    919    If an entry has four braced fields, then the second and third
    920    constitute primary and secondary names.
    921    In this case, each change of primary name
    922    generates a \primary line which contains only the primary name,
    923    and in between these are \secondary lines which contain
    924    just a secondary name and page numbers. */
    925 
    926 /* The last primary name we wrote a \primary entry for.
    927    If only one level of indexing is being done, this is the last name seen. */
    928 char *lastprimary;
    929 /* Length of storage allocated for lastprimary. */
    930 int lastprimarylength;
    931 
    932 /* Similar, for the secondary name. */
    933 char *lastsecondary;
    934 int lastsecondarylength;
    935 
    936 /* Zero if we are not in the middle of writing an entry.
    937    One if we have written the beginning of an entry but have not
    938    yet written any page numbers into it.
    939    Greater than one if we have written the beginning of an entry
    940    plus at least one page number. */
    941 int pending;
    942 
    943 /* The initial (for sorting purposes) of the last primary entry written.
    944    When this changes, a \initial {c} line is written */
    945 
    946 char *lastinitial;
    947 
    948 int lastinitiallength;
    949 
    950 /* When we need a string of length 1 for the value of lastinitial,
    951    store it here.  */
    952 
    953 char lastinitial1[2];
    954 
    955 /* Initialize static storage for writing an index. */
    956 
    957 void
    958 init_index (void)
    959 {
    960   pending = 0;
    961   lastinitial = lastinitial1;
    962   lastinitial1[0] = 0;
    963   lastinitial1[1] = 0;
    964   lastinitiallength = 0;
    965   lastprimarylength = 100;
    966   lastprimary = (char *) xmalloc (lastprimarylength + 1);
    967   memset (lastprimary, '\0', lastprimarylength + 1);
    968   lastsecondarylength = 100;
    969   lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
    970   memset (lastsecondary, '\0', lastsecondarylength + 1);
    971 }
    972 
    973 /* Indexify.  Merge entries for the same name,
    974    insert headers for each initial character, etc.  */
    975 
    976 void
    977 indexify (char *line, FILE *ostream)
    978 {
    979   char *primary, *secondary, *pagenumber;
    980   int primarylength, secondarylength = 0, pagelength;
    981   int nosecondary;
    982   int initiallength;
    983   char *initial;
    984   char initial1[2];
    985   register char *p;
    986 
    987   /* First, analyze the parts of the entry fed to us this time. */
    988 
    989   p = find_braced_pos (line, 0, 0, 0);
    990   if (*p == '{')
    991     {
    992       initial = p;
    993       /* Get length of inner pair of braces starting at `p',
    994          including that inner pair of braces.  */
    995       initiallength = find_braced_end (p + 1) + 1 - p;
    996     }
    997   else
    998     {
    999       initial = initial1;
   1000       initial1[0] = toupper (*p);
   1001       initial1[1] = 0;
   1002       initiallength = 1;
   1003     }
   1004 
   1005   pagenumber = find_braced_pos (line, 1, 0, 0);
   1006   pagelength = find_braced_end (pagenumber) - pagenumber;
   1007   if (pagelength == 0)
   1008     fatal (_("No page number in %s"), line);
   1009 
   1010   primary = find_braced_pos (line, 2, 0, 0);
   1011   primarylength = find_braced_end (primary) - primary;
   1012 
   1013   secondary = find_braced_pos (line, 3, 0, 0);
   1014   nosecondary = !*secondary;
   1015   if (!nosecondary)
   1016     secondarylength = find_braced_end (secondary) - secondary;
   1017 
   1018   /* If the primary is different from before, make a new primary entry. */
   1019   if (strncmp (primary, lastprimary, primarylength))
   1020     {
   1021       /* Close off current secondary entry first, if one is open. */
   1022       if (pending)
   1023         {
   1024           fputs ("}\n", ostream);
   1025           pending = 0;
   1026         }
   1027 
   1028       /* If this primary has a different initial, include an entry for
   1029          the initial. */
   1030       if (need_initials &&
   1031           (initiallength != lastinitiallength ||
   1032            strncmp (initial, lastinitial, initiallength)))
   1033         {
   1034           fprintf (ostream, "\\initial {");
   1035           fwrite (initial, 1, initiallength, ostream);
   1036           fputs ("}\n", ostream);
   1037           if (initial == initial1)
   1038             {
   1039               lastinitial = lastinitial1;
   1040               *lastinitial1 = *initial1;
   1041             }
   1042           else
   1043             {
   1044               lastinitial = initial;
   1045             }
   1046           lastinitiallength = initiallength;
   1047         }
   1048 
   1049       /* Make the entry for the primary.  */
   1050       if (nosecondary)
   1051         fputs ("\\entry {", ostream);
   1052       else
   1053         fputs ("\\primary {", ostream);
   1054       fwrite (primary, primarylength, 1, ostream);
   1055       if (nosecondary)
   1056         {
   1057           fputs ("}{", ostream);
   1058           pending = 1;
   1059         }
   1060       else
   1061         fputs ("}\n", ostream);
   1062 
   1063       /* Record name of most recent primary. */
   1064       if (lastprimarylength < primarylength)
   1065         {
   1066           lastprimarylength = primarylength + 100;
   1067           lastprimary = (char *) xrealloc (lastprimary,
   1068                                            1 + lastprimarylength);
   1069         }
   1070       strncpy (lastprimary, primary, primarylength);
   1071       lastprimary[primarylength] = 0;
   1072 
   1073       /* There is no current secondary within this primary, now. */
   1074       lastsecondary[0] = 0;
   1075     }
   1076 
   1077   /* Should not have an entry with no subtopic following one with a
   1078      subtopic. */
   1079 
   1080   if (nosecondary && *lastsecondary)
   1081     error (_("entry %s follows an entry with a secondary name"), line);
   1082 
   1083   /* Start a new secondary entry if necessary. */
   1084   if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
   1085     {
   1086       if (pending)
   1087         {
   1088           fputs ("}\n", ostream);
   1089           pending = 0;
   1090         }
   1091 
   1092       /* Write the entry for the secondary.  */
   1093       fputs ("\\secondary {", ostream);
   1094       fwrite (secondary, secondarylength, 1, ostream);
   1095       fputs ("}{", ostream);
   1096       pending = 1;
   1097 
   1098       /* Record name of most recent secondary. */
   1099       if (lastsecondarylength < secondarylength)
   1100         {
   1101           lastsecondarylength = secondarylength + 100;
   1102           lastsecondary = (char *) xrealloc (lastsecondary,
   1103                                              1 + lastsecondarylength);
   1104         }
   1105       strncpy (lastsecondary, secondary, secondarylength);
   1106       lastsecondary[secondarylength] = 0;
   1107     }
   1108 
   1109   /* Here to add one more page number to the current entry. */
   1110   if (pending++ != 1)
   1111     fputs (", ", ostream);  /* Punctuate first, if this is not the first. */
   1112   fwrite (pagenumber, pagelength, 1, ostream);
   1113 }
   1114 
   1115 /* Close out any unfinished output entry. */
   1116 
   1117 void
   1118 finish_index (FILE *ostream)
   1119 {
   1120   if (pending)
   1121     fputs ("}\n", ostream);
   1122   free (lastprimary);
   1123   free (lastsecondary);
   1124 }
   1125 
   1126 /* Copy the lines in the sorted order.
   1128    Each line is copied out of the input file it was found in. */
   1129 
   1130 void
   1131 writelines (char **linearray, int nlines, FILE *ostream)
   1132 {
   1133   char **stop_line = linearray + nlines;
   1134   char **next_line;
   1135 
   1136   init_index ();
   1137 
   1138   /* Output the text of the lines, and free the buffer space. */
   1139 
   1140   for (next_line = linearray; next_line != stop_line; next_line++)
   1141     {
   1142       /* Output the line only if distinct from previous one.  */
   1143       if (next_line == linearray
   1144       /* Compare previous line with this one, using only the
   1145          explicitly specd keyfields. */
   1146           || compare_general (*(next_line - 1), *next_line, 0L, 0L,
   1147                               num_keyfields - 1))
   1148         {
   1149           char *p = *next_line;
   1150           char c;
   1151 
   1152           while ((c = *p++) && c != '\n')
   1153             /* Do nothing. */ ;
   1154           *(p - 1) = 0;
   1155           indexify (*next_line, ostream);
   1156         }
   1157     }
   1158 
   1159   finish_index (ostream);
   1160 }
   1161 
   1162 /* Print error message and exit.  */
   1164 
   1165 void
   1166 fatal (const char *format, const char *arg)
   1167 {
   1168   error (format, arg);
   1169   xexit (1);
   1170 }
   1171 
   1172 /* Print error message.  FORMAT is printf control string, ARG is arg for it. */
   1173 void
   1174 error (const char *format, const char *arg)
   1175 {
   1176   printf ("%s: ", program_name);
   1177   printf (format, arg);
   1178   if (format[strlen (format) -1] != '\n')
   1179     printf ("\n");
   1180 }
   1181 
   1182 void
   1183 perror_with_name (const char *name)
   1184 {
   1185   fprintf (stderr, "%s: ", program_name);
   1186   perror (name);
   1187 }
   1188 
   1189 void
   1190 pfatal_with_name (const char *name)
   1191 {
   1192   perror_with_name (name);
   1193   xexit (1);
   1194 }
   1195 
   1196 
   1197 /* Return a newly-allocated string concatenating S1, S2, and S3.  */
   1199 
   1200 static char *
   1201 concat3 (const char *s1, const char *s2, const char *s3)
   1202 {
   1203   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
   1204   char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
   1205 
   1206   strcpy (result, s1);
   1207   strcpy (result + len1, s2);
   1208   strcpy (result + len1 + len2, s3);
   1209   *(result + len1 + len2 + len3) = 0;
   1210 
   1211   return result;
   1212 }
   1213