Home | History | Annotate | Line # | Download | only in util
      1  1.6     oster /*	$NetBSD: texindex.c,v 1.6 2025/12/31 22:18:50 oster Exp $	*/
      2  1.1  christos 
      3  1.1  christos /* texindex -- sort TeX index dribble output into an actual index.
      4  1.1  christos    Id: texindex.c,v 1.11 2004/04/11 17:56:47 karl Exp
      5  1.1  christos 
      6  1.1  christos    Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001,
      7  1.1  christos    2002, 2003, 2004 Free Software Foundation, Inc.
      8  1.1  christos 
      9  1.1  christos    This program is free software; you can redistribute it and/or modify
     10  1.1  christos    it under the terms of the GNU General Public License as published by
     11  1.1  christos    the Free Software Foundation; either version 2, or (at your option)
     12  1.1  christos    any later version.
     13  1.1  christos 
     14  1.1  christos    This program is distributed in the hope that it will be useful,
     15  1.1  christos    but WITHOUT ANY WARRANTY; without even the implied warranty of
     16  1.1  christos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     17  1.1  christos    GNU General Public License for more details.
     18  1.1  christos 
     19  1.1  christos    You should have received a copy of the GNU General Public License
     20  1.1  christos    along with this program; if not, write to the Free Software
     21  1.1  christos    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
     22  1.1  christos 
     23  1.1  christos #include "system.h"
     24  1.1  christos #include <getopt.h>
     25  1.1  christos 
     26  1.1  christos static char *program_name = "texindex";
     27  1.1  christos 
     28  1.1  christos #if defined (emacs)
     29  1.1  christos #  include "../src/config.h"
     30  1.1  christos /* Some s/os.h files redefine these. */
     31  1.1  christos #  undef read
     32  1.1  christos #  undef close
     33  1.1  christos #  undef write
     34  1.1  christos #  undef open
     35  1.1  christos #endif
     36  1.1  christos 
     37  1.1  christos #if !defined (HAVE_MEMSET)
     38  1.1  christos #undef memset
     39  1.1  christos #define memset(ptr, ignore, count) bzero (ptr, count)
     40  1.1  christos #endif
     41  1.1  christos 
     42  1.1  christos #if !defined (SEEK_SET)
     43  1.1  christos #  define SEEK_SET 0
     44  1.1  christos #  define SEEK_CUR 1
     45  1.1  christos #  define SEEK_END 2
     46  1.1  christos #endif /* !SEEK_SET */
     47  1.1  christos 
     48  1.1  christos /* When sorting in core, this structure describes one line
     49  1.1  christos    and the position and length of its first keyfield.  */
     50  1.1  christos struct lineinfo
     51  1.1  christos {
     52  1.1  christos   char *text;           /* The actual text of the line. */
     53  1.1  christos   union {
     54  1.1  christos     char *text;         /* The start of the key (for textual comparison). */
     55  1.1  christos     long number;        /* The numeric value (for numeric comparison). */
     56  1.1  christos   } key;
     57  1.1  christos   long keylen;          /* Length of KEY field. */
     58  1.3  christos   size_t idx;		/* tie breaker */
     59  1.1  christos };
     60  1.1  christos 
     61  1.1  christos /* This structure describes a field to use as a sort key. */
     62  1.1  christos struct keyfield
     63  1.1  christos {
     64  1.1  christos   int startwords;       /* Number of words to skip. */
     65  1.1  christos   int startchars;       /* Number of additional chars to skip. */
     66  1.1  christos   int endwords;         /* Number of words to ignore at end. */
     67  1.1  christos   int endchars;         /* Ditto for characters of last word. */
     68  1.1  christos   char ignore_blanks;   /* Non-zero means ignore spaces and tabs. */
     69  1.1  christos   char fold_case;       /* Non-zero means case doesn't matter. */
     70  1.1  christos   char reverse;         /* Non-zero means compare in reverse order. */
     71  1.1  christos   char numeric;         /* Non-zeros means field is ASCII numeric. */
     72  1.1  christos   char positional;      /* Sort according to file position. */
     73  1.1  christos   char braced;          /* Count balanced-braced groupings as fields. */
     74  1.1  christos };
     75  1.1  christos 
     76  1.1  christos /* Vector of keyfields to use. */
     77  1.1  christos struct keyfield keyfields[3];
     78  1.1  christos 
     79  1.1  christos /* Number of keyfields stored in that vector.  */
     80  1.1  christos int num_keyfields = 3;
     81  1.1  christos 
     82  1.1  christos /* Vector of input file names, terminated with a null pointer. */
     83  1.1  christos char **infiles;
     84  1.1  christos 
     85  1.1  christos /* Vector of corresponding output file names, or NULL, meaning default it
     86  1.1  christos    (add an `s' to the end). */
     87  1.1  christos char **outfiles;
     88  1.1  christos 
     89  1.1  christos /* Length of `infiles'. */
     90  1.1  christos int num_infiles;
     91  1.1  christos 
     92  1.1  christos /* Pointer to the array of pointers to lines being sorted. */
     93  1.1  christos char **linearray;
     94  1.1  christos 
     95  1.1  christos /* The allocated length of `linearray'. */
     96  1.1  christos long nlines;
     97  1.1  christos 
     98  1.1  christos /* During in-core sort, this points to the base of the data block
     99  1.1  christos    which contains all the lines of data.  */
    100  1.1  christos char *text_base;
    101  1.1  christos 
    102  1.1  christos /* Initially 0; changed to 1 if we want initials in this index.  */
    103  1.1  christos int need_initials;
    104  1.1  christos 
    105  1.1  christos /* Remembers the first initial letter seen in this index, so we can
    106  1.1  christos    determine whether we need initials in the sorted form.  */
    107  1.1  christos char first_initial;
    108  1.1  christos 
    109  1.1  christos /* Forward declarations of functions in this file. */
    110  1.1  christos void decode_command (int argc, char **argv);
    111  1.1  christos void sort_in_core (char *infile, int total, char *outfile);
    112  1.1  christos char **parsefile (char *filename, char **nextline, char *data, long int size);
    113  1.1  christos char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr);
    114  1.1  christos char *find_pos (char *str, int words, int chars, int ignore_blanks);
    115  1.1  christos long find_value (char *start, long int length);
    116  1.1  christos char *find_braced_pos (char *str, int words, int chars, int ignore_blanks);
    117  1.1  christos char *find_braced_end (char *str);
    118  1.1  christos void writelines (char **linearray, int nlines, FILE *ostream);
    119  1.1  christos int compare_field (struct keyfield *keyfield, char *start1,
    120  1.1  christos                    long int length1, long int pos1, char *start2,
    121  1.1  christos                    long int length2, long int pos2);
    122  1.1  christos int compare_full (const void *, const void *);
    123  1.1  christos void pfatal_with_name (const char *name);
    124  1.1  christos void fatal (const char *format, const char *arg);
    125  1.1  christos void error (const char *format, const char *arg);
    126  1.6     oster void *xmalloc (size_t), *xrealloc (void *, size_t);
    127  1.2  christos static char *concat3 (const char *, const char *, const char *);
    128  1.1  christos 
    129  1.1  christos int
    131  1.1  christos main (int argc, char **argv)
    132  1.1  christos {
    133  1.1  christos   int i;
    134  1.1  christos 
    135  1.1  christos #ifdef HAVE_SETLOCALE
    136  1.1  christos   /* Set locale via LC_ALL.  */
    137  1.1  christos   setlocale (LC_ALL, "");
    138  1.1  christos #endif
    139  1.1  christos 
    140  1.1  christos   /* Set the text message domain.  */
    141  1.1  christos   bindtextdomain (PACKAGE, LOCALEDIR);
    142  1.1  christos   textdomain (PACKAGE);
    143  1.1  christos 
    144  1.1  christos   /* In case we write to a redirected stdout that fails.  */
    145  1.1  christos   /* not ready atexit (close_stdout); */
    146  1.1  christos 
    147  1.1  christos   /* Describe the kind of sorting to do. */
    148  1.1  christos   /* The first keyfield uses the first braced field and folds case. */
    149  1.1  christos   keyfields[0].braced = 1;
    150  1.1  christos   keyfields[0].fold_case = 1;
    151  1.1  christos   keyfields[0].endwords = -1;
    152  1.1  christos   keyfields[0].endchars = -1;
    153  1.1  christos 
    154  1.1  christos   /* The second keyfield uses the second braced field, numerically. */
    155  1.1  christos   keyfields[1].braced = 1;
    156  1.1  christos   keyfields[1].numeric = 1;
    157  1.1  christos   keyfields[1].startwords = 1;
    158  1.1  christos   keyfields[1].endwords = -1;
    159  1.1  christos   keyfields[1].endchars = -1;
    160  1.1  christos 
    161  1.1  christos   /* The third keyfield (which is ignored while discarding duplicates)
    162  1.1  christos      compares the whole line. */
    163  1.1  christos   keyfields[2].endwords = -1;
    164  1.1  christos   keyfields[2].endchars = -1;
    165  1.1  christos 
    166  1.1  christos   decode_command (argc, argv);
    167  1.1  christos 
    168  1.1  christos   /* Process input files completely, one by one.  */
    169  1.1  christos 
    170  1.1  christos   for (i = 0; i < num_infiles; i++)
    171  1.1  christos     {
    172  1.1  christos       int desc;
    173  1.1  christos       off_t ptr;
    174  1.1  christos       char *outfile;
    175  1.1  christos       struct stat instat;
    176  1.1  christos 
    177  1.1  christos       desc = open (infiles[i], O_RDONLY, 0);
    178  1.1  christos       if (desc < 0)
    179  1.1  christos         pfatal_with_name (infiles[i]);
    180  1.1  christos 
    181  1.1  christos       if (stat (infiles[i], &instat))
    182  1.1  christos         pfatal_with_name (infiles[i]);
    183  1.1  christos       if (S_ISDIR (instat.st_mode))
    184  1.1  christos         {
    185  1.1  christos #ifdef EISDIR
    186  1.1  christos           errno = EISDIR;
    187  1.1  christos #endif
    188  1.1  christos           pfatal_with_name (infiles[i]);
    189  1.1  christos         }
    190  1.1  christos 
    191  1.1  christos       lseek (desc, (off_t) 0, SEEK_END);
    192  1.1  christos       ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR);
    193  1.1  christos 
    194  1.1  christos       close (desc);
    195  1.1  christos 
    196  1.1  christos       outfile = outfiles[i];
    197  1.2  christos       if (!outfile)
    198  1.1  christos         outfile = concat3 (infiles[i], "s", "");
    199  1.1  christos 
    200  1.1  christos       need_initials = 0;
    201  1.1  christos       first_initial = '\0';
    202  1.2  christos 
    203  1.2  christos       if (ptr != (int)ptr)
    204  1.2  christos 	{
    205  1.2  christos 	  fprintf (stderr, "%s: %s: file too large\n", program_name,
    206  1.2  christos 		   infiles[i]);
    207  1.2  christos 	  xexit (1);
    208  1.2  christos 	}
    209  1.1  christos       sort_in_core (infiles[i], (int)ptr, outfile);
    210  1.1  christos     }
    211  1.1  christos 
    212  1.1  christos   xexit (0);
    213  1.1  christos   return 0; /* Avoid bogus warnings.  */
    214  1.1  christos }
    215  1.1  christos 
    216  1.1  christos typedef struct
    218  1.1  christos {
    219  1.1  christos   char *long_name;
    220  1.1  christos   char *short_name;
    221  1.1  christos   int *variable_ref;
    222  1.1  christos   int variable_value;
    223  1.1  christos   char *arg_name;
    224  1.1  christos   char *doc_string;
    225  1.1  christos } TEXINDEX_OPTION;
    226  1.1  christos 
    227  1.1  christos TEXINDEX_OPTION texindex_options[] = {
    228  1.1  christos   { "--help", "-h", (int *)NULL, 0, (char *)NULL,
    229  1.1  christos       N_("display this help and exit") },
    230  1.1  christos   { "--output", "-o", (int *)NULL, 0, "FILE",
    231  1.1  christos       N_("send output to FILE") },
    232  1.1  christos   { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
    233  1.1  christos       N_("display version information and exit") },
    234  1.1  christos   { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
    235  1.1  christos };
    236  1.1  christos 
    237  1.1  christos void
    238  1.1  christos usage (int result_value)
    239  1.1  christos {
    240  1.1  christos   register int i;
    241  1.1  christos   FILE *f = result_value ? stderr : stdout;
    242  1.1  christos 
    243  1.1  christos   fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name);
    244  1.1  christos   fprintf (f, _("Generate a sorted index for each TeX output FILE.\n"));
    245  1.1  christos   /* Avoid trigraph nonsense.  */
    246  1.1  christos   fprintf (f,
    247  1.1  christos _("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"),
    248  1.1  christos            '?', '?'); /* avoid trigraph in cat-id-tbl.c */
    249  1.1  christos   fprintf (f, _("\nOptions:\n"));
    250  1.1  christos 
    251  1.1  christos   for (i = 0; texindex_options[i].long_name; i++)
    252  1.1  christos     {
    253  1.1  christos       putc (' ', f);
    254  1.1  christos 
    255  1.1  christos       if (texindex_options[i].short_name)
    256  1.1  christos         fprintf (f, "%s, ", texindex_options[i].short_name);
    257  1.1  christos 
    258  1.1  christos       fprintf (f, "%s %s",
    259  1.1  christos                texindex_options[i].long_name,
    260  1.1  christos                texindex_options[i].arg_name
    261  1.1  christos                ? texindex_options[i].arg_name : "");
    262  1.1  christos 
    263  1.1  christos       fprintf (f, "\t%s\n", _(texindex_options[i].doc_string));
    264  1.1  christos     }
    265  1.1  christos   fputs (_("\n\
    266  1.1  christos Email bug reports to bug-texinfo (at) gnu.org,\n\
    267  1.1  christos general questions and discussion to help-texinfo (at) gnu.org.\n\
    268  1.1  christos Texinfo home page: http://www.gnu.org/software/texinfo/"), f);
    269  1.1  christos   fputs ("\n", f);
    270  1.1  christos 
    271  1.1  christos   xexit (result_value);
    272  1.1  christos }
    273  1.1  christos 
    274  1.1  christos /* Decode the command line arguments to set the parameter variables
    275  1.1  christos    and set up the vector of keyfields and the vector of input files. */
    276  1.1  christos 
    277  1.1  christos void
    278  1.1  christos decode_command (int argc, char **argv)
    279  1.1  christos {
    280  1.1  christos   int arg_index = 1;
    281  1.1  christos   char **ip;
    282  1.1  christos   char **op;
    283  1.1  christos 
    284  1.1  christos   /* Allocate ARGC input files, which must be enough.  */
    285  1.1  christos 
    286  1.1  christos   infiles = (char **) xmalloc (argc * sizeof (char *));
    287  1.1  christos   outfiles = (char **) xmalloc (argc * sizeof (char *));
    288  1.1  christos   ip = infiles;
    289  1.1  christos   op = outfiles;
    290  1.1  christos 
    291  1.1  christos   while (arg_index < argc)
    292  1.1  christos     {
    293  1.1  christos       char *arg = argv[arg_index++];
    294  1.1  christos 
    295  1.1  christos       if (*arg == '-')
    296  1.1  christos         {
    297  1.1  christos           if (strcmp (arg, "--version") == 0)
    298  1.1  christos             {
    299  1.1  christos               printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION);
    300  1.1  christos               puts ("");
    301  1.1  christos               puts ("Copyright (C) 2004 Free Software Foundation, Inc.");
    302  1.1  christos               printf (_("There is NO warranty.  You may redistribute this software\n\
    303  1.1  christos under the terms of the GNU General Public License.\n\
    304  1.1  christos For more information about these matters, see the files named COPYING.\n"));
    305  1.1  christos               xexit (0);
    306  1.1  christos             }
    307  1.1  christos           else if ((strcmp (arg, "--keep") == 0) ||
    308  1.2  christos                    (strcmp (arg, "-k") == 0))
    309  1.1  christos             {
    310  1.1  christos 	      /* Ignore, for backward compatibility */
    311  1.1  christos             }
    312  1.1  christos           else if ((strcmp (arg, "--help") == 0) ||
    313  1.1  christos                    (strcmp (arg, "-h") == 0))
    314  1.1  christos             {
    315  1.1  christos               usage (0);
    316  1.1  christos             }
    317  1.1  christos           else if ((strcmp (arg, "--output") == 0) ||
    318  1.1  christos                    (strcmp (arg, "-o") == 0))
    319  1.1  christos             {
    320  1.1  christos               if (argv[arg_index] != (char *)NULL)
    321  1.1  christos                 {
    322  1.1  christos                   arg_index++;
    323  1.1  christos                   if (op > outfiles)
    324  1.1  christos                     *(op - 1) = argv[arg_index];
    325  1.1  christos                 }
    326  1.1  christos               else
    327  1.1  christos                 usage (1);
    328  1.1  christos             }
    329  1.1  christos           else
    330  1.1  christos             usage (1);
    331  1.1  christos         }
    332  1.1  christos       else
    333  1.1  christos         {
    334  1.1  christos           *ip++ = arg;
    335  1.1  christos           *op++ = (char *)NULL;
    336  1.1  christos         }
    337  1.1  christos     }
    338  1.1  christos 
    339  1.1  christos   /* Record number of keyfields and terminate list of filenames. */
    340  1.1  christos   num_infiles = ip - infiles;
    341  1.1  christos   *ip = (char *)NULL;
    342  1.1  christos   if (num_infiles == 0)
    343  1.1  christos     usage (1);
    344  1.1  christos }
    345  1.1  christos 
    346  1.1  christos /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
    348  1.1  christos 
    349  1.1  christos int
    350  1.1  christos compare_full (const void *p1, const void *p2)
    351  1.1  christos {
    352  1.1  christos   char **line1 = (char **) p1;
    353  1.1  christos   char **line2 = (char **) p2;
    354  1.1  christos   int i;
    355  1.1  christos 
    356  1.1  christos   /* Compare using the first keyfield;
    357  1.1  christos      if that does not distinguish the lines, try the second keyfield;
    358  1.1  christos      and so on. */
    359  1.1  christos 
    360  1.1  christos   for (i = 0; i < num_keyfields; i++)
    361  1.1  christos     {
    362  1.1  christos       long length1, length2;
    363  1.1  christos       char *start1 = find_field (&keyfields[i], *line1, &length1);
    364  1.1  christos       char *start2 = find_field (&keyfields[i], *line2, &length2);
    365  1.1  christos       int tem = compare_field (&keyfields[i], start1, length1,
    366  1.1  christos                                *line1 - text_base,
    367  1.1  christos                                start2, length2, *line2 - text_base);
    368  1.1  christos       if (tem)
    369  1.1  christos         {
    370  1.1  christos           if (keyfields[i].reverse)
    371  1.1  christos             return -tem;
    372  1.1  christos           return tem;
    373  1.3  christos         }
    374  1.3  christos     }
    375  1.3  christos 
    376  1.1  christos   if (*line1 == *line2)
    377  1.1  christos     abort ();
    378  1.1  christos   return *line1 < *line2 ? -1 : 1;
    379  1.1  christos }
    380  1.1  christos 
    381  1.1  christos /* Compare LINE1 and LINE2, described by structures
    382  1.1  christos    in which the first keyfield is identified in advance.
    383  1.1  christos    For positional sorting, assumes that the order of the lines in core
    384  1.1  christos    reflects their nominal order.  */
    385  1.1  christos int
    386  1.1  christos compare_prepared (const void *p1, const void *p2)
    387  1.1  christos {
    388  1.1  christos   struct lineinfo *line1 = (struct lineinfo *) p1;
    389  1.1  christos   struct lineinfo *line2 = (struct lineinfo *) p2;
    390  1.1  christos   int i;
    391  1.1  christos   int tem;
    392  1.1  christos   char *text1, *text2;
    393  1.1  christos 
    394  1.1  christos   /* Compare using the first keyfield, which has been found for us already. */
    395  1.1  christos   if (keyfields->positional)
    396  1.1  christos     {
    397  1.1  christos       if (line1->text - text_base > line2->text - text_base)
    398  1.1  christos         tem = 1;
    399  1.1  christos       else
    400  1.1  christos         tem = -1;
    401  1.1  christos     }
    402  1.1  christos   else if (keyfields->numeric)
    403  1.1  christos     tem = line1->key.number - line2->key.number;
    404  1.1  christos   else
    405  1.1  christos     tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
    406  1.1  christos                          line2->key.text, line2->keylen, 0);
    407  1.1  christos   if (tem)
    408  1.1  christos     {
    409  1.1  christos       if (keyfields->reverse)
    410  1.1  christos         return -tem;
    411  1.1  christos       return tem;
    412  1.1  christos     }
    413  1.1  christos 
    414  1.1  christos   text1 = line1->text;
    415  1.1  christos   text2 = line2->text;
    416  1.1  christos 
    417  1.1  christos   /* Compare using the second keyfield;
    418  1.1  christos      if that does not distinguish the lines, try the third keyfield;
    419  1.1  christos      and so on. */
    420  1.1  christos 
    421  1.1  christos   for (i = 1; i < num_keyfields; i++)
    422  1.1  christos     {
    423  1.1  christos       long length1, length2;
    424  1.1  christos       char *start1 = find_field (&keyfields[i], text1, &length1);
    425  1.1  christos       char *start2 = find_field (&keyfields[i], text2, &length2);
    426  1.1  christos       int tem = compare_field (&keyfields[i], start1, length1,
    427  1.1  christos                                text1 - text_base,
    428  1.1  christos                                start2, length2, text2 - text_base);
    429  1.1  christos       if (tem)
    430  1.1  christos         {
    431  1.1  christos           if (keyfields[i].reverse)
    432  1.1  christos             return -tem;
    433  1.1  christos           return tem;
    434  1.3  christos         }
    435  1.3  christos     }
    436  1.3  christos 
    437  1.1  christos   if (line1->idx == line2->idx)
    438  1.1  christos     abort ();
    439  1.1  christos   return line1->idx < line2->idx ? -1 : 1;
    440  1.1  christos }
    441  1.1  christos 
    442  1.1  christos /* Like compare_full but more general.
    443  1.1  christos    You can pass any strings, and you can say how many keyfields to use.
    444  1.1  christos    POS1 and POS2 should indicate the nominal positional ordering of
    445  1.1  christos    the two lines in the input.  */
    446  1.1  christos 
    447  1.1  christos int
    448  1.1  christos compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields)
    449  1.1  christos {
    450  1.1  christos   int i;
    451  1.1  christos 
    452  1.1  christos   /* Compare using the first keyfield;
    453  1.1  christos      if that does not distinguish the lines, try the second keyfield;
    454  1.1  christos      and so on. */
    455  1.1  christos 
    456  1.1  christos   for (i = 0; i < use_keyfields; i++)
    457  1.1  christos     {
    458  1.1  christos       long length1, length2;
    459  1.1  christos       char *start1 = find_field (&keyfields[i], str1, &length1);
    460  1.1  christos       char *start2 = find_field (&keyfields[i], str2, &length2);
    461  1.1  christos       int tem = compare_field (&keyfields[i], start1, length1, pos1,
    462  1.1  christos                                start2, length2, pos2);
    463  1.1  christos       if (tem)
    464  1.1  christos         {
    465  1.1  christos           if (keyfields[i].reverse)
    466  1.1  christos             return -tem;
    467  1.1  christos           return tem;
    468  1.1  christos         }
    469  1.1  christos     }
    470  1.1  christos 
    471  1.1  christos   return 0;                     /* Lines match exactly. */
    472  1.1  christos }
    473  1.1  christos 
    474  1.1  christos /* Find the start and length of a field in STR according to KEYFIELD.
    475  1.1  christos    A pointer to the starting character is returned, and the length
    476  1.1  christos    is stored into the int that LENGTHPTR points to.  */
    477  1.1  christos 
    478  1.1  christos char *
    479  1.1  christos find_field (struct keyfield *keyfield, char *str, long int *lengthptr)
    480  1.6     oster {
    481  1.1  christos   char *start;
    482  1.1  christos   char *end;
    483  1.1  christos   char *(*fun) (char*, int, int, int);
    484  1.1  christos 
    485  1.1  christos   if (keyfield->braced)
    486  1.1  christos     fun = find_braced_pos;
    487  1.1  christos   else
    488  1.1  christos     fun = find_pos;
    489  1.1  christos 
    490  1.1  christos   start = (*fun) (str, keyfield->startwords, keyfield->startchars,
    491  1.1  christos                   keyfield->ignore_blanks);
    492  1.1  christos   if (keyfield->endwords < 0)
    493  1.1  christos     {
    494  1.1  christos       if (keyfield->braced)
    495  1.1  christos         end = find_braced_end (start);
    496  1.1  christos       else
    497  1.1  christos         {
    498  1.1  christos           end = start;
    499  1.1  christos           while (*end && *end != '\n')
    500  1.1  christos             end++;
    501  1.1  christos         }
    502  1.1  christos     }
    503  1.1  christos   else
    504  1.1  christos     {
    505  1.1  christos       end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
    506  1.1  christos       if (end - str < start - str)
    507  1.1  christos         end = start;
    508  1.1  christos     }
    509  1.1  christos   *lengthptr = end - start;
    510  1.1  christos   return start;
    511  1.1  christos }
    512  1.1  christos 
    513  1.1  christos /* Return a pointer to a specified place within STR,
    514  1.1  christos    skipping (from the beginning) WORDS words and then CHARS chars.
    515  1.1  christos    If IGNORE_BLANKS is nonzero, we skip all blanks
    516  1.1  christos    after finding the specified word.  */
    517  1.1  christos 
    518  1.1  christos char *
    519  1.1  christos find_pos (char *str, int words, int chars, int ignore_blanks)
    520  1.1  christos {
    521  1.1  christos   int i;
    522  1.1  christos   char *p = str;
    523  1.1  christos 
    524  1.1  christos   for (i = 0; i < words; i++)
    525  1.1  christos     {
    526  1.1  christos       char c;
    527  1.1  christos       /* Find next bunch of nonblanks and skip them. */
    528  1.1  christos       while ((c = *p) == ' ' || c == '\t')
    529  1.1  christos         p++;
    530  1.1  christos       while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
    531  1.1  christos         p++;
    532  1.1  christos       if (!*p || *p == '\n')
    533  1.1  christos         return p;
    534  1.1  christos     }
    535  1.1  christos 
    536  1.1  christos   while (*p == ' ' || *p == '\t')
    537  1.1  christos     p++;
    538  1.1  christos 
    539  1.1  christos   for (i = 0; i < chars; i++)
    540  1.1  christos     {
    541  1.1  christos       if (!*p || *p == '\n')
    542  1.1  christos         break;
    543  1.1  christos       p++;
    544  1.1  christos     }
    545  1.1  christos   return p;
    546  1.1  christos }
    547  1.1  christos 
    548  1.1  christos /* Like find_pos but assumes that each field is surrounded by braces
    549  1.1  christos    and that braces within fields are balanced. */
    550  1.1  christos 
    551  1.1  christos char *
    552  1.1  christos find_braced_pos (char *str, int words, int chars, int ignore_blanks)
    553  1.1  christos {
    554  1.1  christos   int i;
    555  1.1  christos   int bracelevel;
    556  1.1  christos   char *p = str;
    557  1.1  christos   char c;
    558  1.1  christos 
    559  1.1  christos   for (i = 0; i < words; i++)
    560  1.1  christos     {
    561  1.1  christos       bracelevel = 1;
    562  1.1  christos       while ((c = *p++) != '{' && c != '\n' && c)
    563  1.1  christos         /* Do nothing. */ ;
    564  1.1  christos       if (c != '{')
    565  1.1  christos         return p - 1;
    566  1.1  christos       while (bracelevel)
    567  1.1  christos         {
    568  1.1  christos           c = *p++;
    569  1.1  christos           if (c == '{')
    570  1.1  christos             bracelevel++;
    571  1.1  christos           if (c == '}')
    572  1.1  christos             bracelevel--;
    573  1.1  christos           if (c == 0 || c == '\n')
    574  1.1  christos             return p - 1;
    575  1.1  christos         }
    576  1.1  christos     }
    577  1.1  christos 
    578  1.1  christos   while ((c = *p++) != '{' && c != '\n' && c)
    579  1.1  christos     /* Do nothing. */ ;
    580  1.1  christos 
    581  1.1  christos   if (c != '{')
    582  1.1  christos     return p - 1;
    583  1.1  christos 
    584  1.1  christos   if (ignore_blanks)
    585  1.1  christos     while ((c = *p) == ' ' || c == '\t')
    586  1.1  christos       p++;
    587  1.1  christos 
    588  1.1  christos   for (i = 0; i < chars; i++)
    589  1.1  christos     {
    590  1.1  christos       if (!*p || *p == '\n')
    591  1.1  christos         break;
    592  1.1  christos       p++;
    593  1.1  christos     }
    594  1.1  christos   return p;
    595  1.1  christos }
    596  1.1  christos 
    597  1.1  christos /* Find the end of the balanced-brace field which starts at STR.
    598  1.1  christos    The position returned is just before the closing brace. */
    599  1.1  christos 
    600  1.1  christos char *
    601  1.1  christos find_braced_end (char *str)
    602  1.1  christos {
    603  1.1  christos   int bracelevel;
    604  1.1  christos   char *p = str;
    605  1.1  christos   char c;
    606  1.1  christos 
    607  1.1  christos   bracelevel = 1;
    608  1.1  christos   while (bracelevel)
    609  1.1  christos     {
    610  1.1  christos       c = *p++;
    611  1.1  christos       if (c == '{')
    612  1.1  christos         bracelevel++;
    613  1.1  christos       if (c == '}')
    614  1.1  christos         bracelevel--;
    615  1.1  christos       if (c == 0 || c == '\n')
    616  1.1  christos         return p - 1;
    617  1.1  christos     }
    618  1.1  christos   return p - 1;
    619  1.1  christos }
    620  1.1  christos 
    621  1.1  christos long
    622  1.1  christos find_value (char *start, long int length)
    623  1.1  christos {
    624  1.1  christos   while (length != 0L)
    625  1.1  christos     {
    626  1.1  christos       if (isdigit (*start))
    627  1.1  christos         return atol (start);
    628  1.1  christos       length--;
    629  1.1  christos       start++;
    630  1.1  christos     }
    631  1.1  christos   return 0l;
    632  1.1  christos }
    633  1.1  christos 
    634  1.1  christos /* Vector used to translate characters for comparison.
    635  1.1  christos    This is how we make all alphanumerics follow all else,
    636  1.1  christos    and ignore case in the first sorting.  */
    637  1.1  christos int char_order[256];
    638  1.1  christos 
    639  1.1  christos void
    640  1.1  christos init_char_order (void)
    641  1.1  christos {
    642  1.1  christos   int i;
    643  1.1  christos   for (i = 1; i < 256; i++)
    644  1.1  christos     char_order[i] = i;
    645  1.1  christos 
    646  1.1  christos   for (i = '0'; i <= '9'; i++)
    647  1.1  christos     char_order[i] += 512;
    648  1.1  christos 
    649  1.1  christos   for (i = 'a'; i <= 'z'; i++)
    650  1.1  christos     {
    651  1.1  christos       char_order[i] = 512 + i;
    652  1.1  christos       char_order[i + 'A' - 'a'] = 512 + i;
    653  1.1  christos     }
    654  1.1  christos }
    655  1.1  christos 
    656  1.1  christos /* Compare two fields (each specified as a start pointer and a character count)
    657  1.1  christos    according to KEYFIELD.
    658  1.1  christos    The sign of the value reports the relation between the fields. */
    659  1.1  christos 
    660  1.1  christos int
    661  1.1  christos compare_field (struct keyfield *keyfield, char *start1, long int length1,
    662  1.1  christos                long int pos1, char *start2, long int length2, long int pos2)
    663  1.1  christos {
    664  1.1  christos   if (keyfields->positional)
    665  1.1  christos     {
    666  1.1  christos       if (pos1 > pos2)
    667  1.1  christos         return 1;
    668  1.1  christos       else
    669  1.1  christos         return -1;
    670  1.1  christos     }
    671  1.1  christos   if (keyfield->numeric)
    672  1.1  christos     {
    673  1.1  christos       long value = find_value (start1, length1) - find_value (start2, length2);
    674  1.1  christos       if (value > 0)
    675  1.1  christos         return 1;
    676  1.1  christos       if (value < 0)
    677  1.1  christos         return -1;
    678  1.1  christos       return 0;
    679  1.1  christos     }
    680  1.1  christos   else
    681  1.1  christos     {
    682  1.1  christos       char *p1 = start1;
    683  1.1  christos       char *p2 = start2;
    684  1.1  christos       char *e1 = start1 + length1;
    685  1.1  christos       char *e2 = start2 + length2;
    686  1.1  christos 
    687  1.1  christos       while (1)
    688  1.1  christos         {
    689  1.1  christos           int c1, c2;
    690  1.1  christos 
    691  1.1  christos           if (p1 == e1)
    692  1.1  christos             c1 = 0;
    693  1.1  christos           else
    694  1.1  christos             c1 = *p1++;
    695  1.1  christos           if (p2 == e2)
    696  1.1  christos             c2 = 0;
    697  1.1  christos           else
    698  1.1  christos             c2 = *p2++;
    699  1.1  christos 
    700  1.1  christos           if (char_order[c1] != char_order[c2])
    701  1.1  christos             return char_order[c1] - char_order[c2];
    702  1.1  christos           if (!c1)
    703  1.1  christos             break;
    704  1.1  christos         }
    705  1.1  christos 
    706  1.1  christos       /* Strings are equal except possibly for case.  */
    707  1.1  christos       p1 = start1;
    708  1.1  christos       p2 = start2;
    709  1.1  christos       while (1)
    710  1.1  christos         {
    711  1.1  christos           int c1, c2;
    712  1.1  christos 
    713  1.1  christos           if (p1 == e1)
    714  1.1  christos             c1 = 0;
    715  1.1  christos           else
    716  1.1  christos             c1 = *p1++;
    717  1.1  christos           if (p2 == e2)
    718  1.1  christos             c2 = 0;
    719  1.1  christos           else
    720  1.1  christos             c2 = *p2++;
    721  1.1  christos 
    722  1.1  christos           if (c1 != c2)
    723  1.1  christos             /* Reverse sign here so upper case comes out last.  */
    724  1.1  christos             return c2 - c1;
    725  1.1  christos           if (!c1)
    726  1.1  christos             break;
    727  1.1  christos         }
    728  1.1  christos 
    729  1.1  christos       return 0;
    730  1.1  christos     }
    731  1.1  christos }
    732  1.1  christos 
    733  1.1  christos /* Sort INFILE, whose size is TOTAL,
    735  1.1  christos    assuming that is small enough to be done in-core,
    736  1.1  christos    then indexify it and send the output to OUTFILE (or to stdout).  */
    737  1.1  christos 
    738  1.1  christos void
    739  1.1  christos sort_in_core (char *infile, int total, char *outfile)
    740  1.1  christos {
    741  1.1  christos   char **nextline;
    742  1.1  christos   char *data = (char *) xmalloc (total + 1);
    743  1.1  christos   char *file_data;
    744  1.1  christos   long file_size;
    745  1.1  christos   int i;
    746  1.1  christos   FILE *ostream = stdout;
    747  1.1  christos   struct lineinfo *lineinfo;
    748  1.1  christos 
    749  1.1  christos   /* Read the contents of the file into the moby array `data'. */
    750  1.1  christos 
    751  1.1  christos   int desc = open (infile, O_RDONLY, 0);
    752  1.1  christos 
    753  1.1  christos   if (desc < 0)
    754  1.1  christos     fatal (_("failure reopening %s"), infile);
    755  1.1  christos   for (file_size = 0;;)
    756  1.1  christos     {
    757  1.1  christos       i = read (desc, data + file_size, total - file_size);
    758  1.1  christos       if (i <= 0)
    759  1.1  christos         break;
    760  1.1  christos       file_size += i;
    761  1.1  christos     }
    762  1.1  christos   file_data = data;
    763  1.1  christos   data[file_size] = 0;
    764  1.1  christos 
    765  1.1  christos   close (desc);
    766  1.1  christos 
    767  1.1  christos   if (file_size > 0 && data[0] != '\\' && data[0] != '@')
    768  1.1  christos     {
    769  1.1  christos       error (_("%s: not a texinfo index file"), infile);
    770  1.1  christos       return;
    771  1.1  christos     }
    772  1.1  christos 
    773  1.1  christos   init_char_order ();
    774  1.1  christos 
    775  1.1  christos   /* Sort routines want to know this address. */
    776  1.1  christos 
    777  1.1  christos   text_base = data;
    778  1.1  christos 
    779  1.1  christos   /* Create the array of pointers to lines, with a default size
    780  1.1  christos      frequently enough.  */
    781  1.1  christos 
    782  1.1  christos   nlines = total / 50;
    783  1.1  christos   if (!nlines)
    784  1.1  christos     nlines = 2;
    785  1.1  christos   linearray = (char **) xmalloc (nlines * sizeof (char *));
    786  1.1  christos 
    787  1.1  christos   /* `nextline' points to the next free slot in this array.
    788  1.1  christos      `nlines' is the allocated size.  */
    789  1.1  christos 
    790  1.1  christos   nextline = linearray;
    791  1.1  christos 
    792  1.1  christos   /* Parse the input file's data, and make entries for the lines.  */
    793  1.1  christos 
    794  1.1  christos   nextline = parsefile (infile, nextline, file_data, file_size);
    795  1.1  christos   if (nextline == 0)
    796  1.1  christos     {
    797  1.1  christos       error (_("%s: not a texinfo index file"), infile);
    798  1.1  christos       return;
    799  1.1  christos     }
    800  1.1  christos 
    801  1.1  christos   /* Sort the lines. */
    802  1.1  christos 
    803  1.1  christos   /* If we have enough space, find the first keyfield of each line in advance.
    804  1.1  christos      Make a `struct lineinfo' for each line, which records the keyfield
    805  1.1  christos      as well as the line, and sort them.  */
    806  1.1  christos 
    807  1.3  christos   lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo));
    808  1.1  christos 
    809  1.1  christos   if (lineinfo)
    810  1.1  christos     {
    811  1.1  christos       size_t idx = 0;
    812  1.1  christos       struct lineinfo *lp;
    813  1.3  christos       char **p;
    814  1.1  christos 
    815  1.1  christos       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
    816  1.1  christos         {
    817  1.1  christos 	  lp->idx = idx++;
    818  1.1  christos           lp->text = *p;
    819  1.1  christos           lp->key.text = find_field (keyfields, *p, &lp->keylen);
    820  1.1  christos           if (keyfields->numeric)
    821  1.1  christos             lp->key.number = find_value (lp->key.text, lp->keylen);
    822  1.1  christos         }
    823  1.1  christos 
    824  1.1  christos       qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
    825  1.1  christos              compare_prepared);
    826  1.1  christos 
    827  1.1  christos       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
    828  1.1  christos         *p = lp->text;
    829  1.1  christos 
    830  1.1  christos       free (lineinfo);
    831  1.1  christos     }
    832  1.1  christos   else
    833  1.1  christos     qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
    834  1.1  christos 
    835  1.1  christos   /* Open the output file. */
    836  1.1  christos 
    837  1.1  christos   if (outfile)
    838  1.1  christos     {
    839  1.1  christos       ostream = fopen (outfile, "w");
    840  1.1  christos       if (!ostream)
    841  1.1  christos         pfatal_with_name (outfile);
    842  1.1  christos     }
    843  1.1  christos 
    844  1.1  christos   writelines (linearray, nextline - linearray, ostream);
    845  1.1  christos   if (outfile)
    846  1.1  christos     fclose (ostream);
    847  1.1  christos 
    848  1.1  christos   free (linearray);
    849  1.1  christos   free (data);
    850  1.1  christos }
    851  1.1  christos 
    852  1.1  christos /* Parse an input string in core into lines.
    854  1.1  christos    DATA is the input string, and SIZE is its length.
    855  1.1  christos    Data goes in LINEARRAY starting at NEXTLINE.
    856  1.1  christos    The value returned is the first entry in LINEARRAY still unused.
    857  1.1  christos    Value 0 means input file contents are invalid.  */
    858  1.1  christos 
    859  1.1  christos char **
    860  1.1  christos parsefile (char *filename, char **nextline, char *data, long int size)
    861  1.1  christos {
    862  1.1  christos   char *p, *end;
    863  1.1  christos   char **line = nextline;
    864  1.1  christos 
    865  1.1  christos   p = data;
    866  1.1  christos   end = p + size;
    867  1.1  christos   *end = 0;
    868  1.1  christos 
    869  1.1  christos   while (p != end)
    870  1.1  christos     {
    871  1.1  christos       if (p[0] != '\\' && p[0] != '@')
    872  1.1  christos         return 0;
    873  1.1  christos 
    874  1.1  christos       *line = p;
    875  1.1  christos 
    876  1.1  christos       /* Find the first letter of the first field of this line.  If it
    877  1.1  christos          is different from the first letter of the first field of the
    878  1.1  christos          first line, we need initial headers in the output index.  */
    879  1.1  christos       while (*p && *p != '{')
    880  1.1  christos         p++;
    881  1.1  christos       if (p == end)
    882  1.1  christos         return 0;
    883  1.1  christos       p++;
    884  1.1  christos       if (first_initial)
    885  1.1  christos         {
    886  1.1  christos           if (first_initial != toupper (*p))
    887  1.1  christos             need_initials = 1;
    888  1.1  christos         }
    889  1.1  christos       else
    890  1.1  christos         first_initial = toupper (*p);
    891  1.1  christos 
    892  1.1  christos       while (*p && *p != '\n')
    893  1.1  christos         p++;
    894  1.1  christos       if (p != end)
    895  1.1  christos         p++;
    896  1.1  christos 
    897  1.1  christos       line++;
    898  1.1  christos       if (line == linearray + nlines)
    899  1.1  christos         {
    900  1.1  christos           char **old = linearray;
    901  1.1  christos           linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4));
    902  1.1  christos           line += linearray - old;
    903  1.1  christos         }
    904  1.1  christos     }
    905  1.1  christos 
    906  1.1  christos   return line;
    907  1.1  christos }
    908  1.1  christos 
    909  1.1  christos /* Indexification is a filter applied to the sorted lines
    911  1.1  christos    as they are being written to the output file.
    912  1.1  christos    Multiple entries for the same name, with different page numbers,
    913  1.1  christos    get combined into a single entry with multiple page numbers.
    914  1.1  christos    The first braced field, which is used for sorting, is discarded.
    915  1.1  christos    However, its first character is examined, folded to lower case,
    916  1.1  christos    and if it is different from that in the previous line fed to us
    917  1.1  christos    a \initial line is written with one argument, the new initial.
    918  1.1  christos 
    919  1.1  christos    If an entry has four braced fields, then the second and third
    920  1.1  christos    constitute primary and secondary names.
    921  1.1  christos    In this case, each change of primary name
    922  1.1  christos    generates a \primary line which contains only the primary name,
    923  1.1  christos    and in between these are \secondary lines which contain
    924  1.1  christos    just a secondary name and page numbers. */
    925  1.1  christos 
    926  1.1  christos /* The last primary name we wrote a \primary entry for.
    927  1.1  christos    If only one level of indexing is being done, this is the last name seen. */
    928  1.1  christos char *lastprimary;
    929  1.1  christos /* Length of storage allocated for lastprimary. */
    930  1.1  christos int lastprimarylength;
    931  1.1  christos 
    932  1.1  christos /* Similar, for the secondary name. */
    933  1.1  christos char *lastsecondary;
    934  1.1  christos int lastsecondarylength;
    935  1.1  christos 
    936  1.1  christos /* Zero if we are not in the middle of writing an entry.
    937  1.1  christos    One if we have written the beginning of an entry but have not
    938  1.1  christos    yet written any page numbers into it.
    939  1.1  christos    Greater than one if we have written the beginning of an entry
    940  1.1  christos    plus at least one page number. */
    941  1.1  christos int pending;
    942  1.1  christos 
    943  1.1  christos /* The initial (for sorting purposes) of the last primary entry written.
    944  1.1  christos    When this changes, a \initial {c} line is written */
    945  1.1  christos 
    946  1.1  christos char *lastinitial;
    947  1.1  christos 
    948  1.1  christos int lastinitiallength;
    949  1.1  christos 
    950  1.1  christos /* When we need a string of length 1 for the value of lastinitial,
    951  1.1  christos    store it here.  */
    952  1.1  christos 
    953  1.1  christos char lastinitial1[2];
    954  1.1  christos 
    955  1.1  christos /* Initialize static storage for writing an index. */
    956  1.1  christos 
    957  1.1  christos void
    958  1.1  christos init_index (void)
    959  1.1  christos {
    960  1.1  christos   pending = 0;
    961  1.1  christos   lastinitial = lastinitial1;
    962  1.1  christos   lastinitial1[0] = 0;
    963  1.1  christos   lastinitial1[1] = 0;
    964  1.1  christos   lastinitiallength = 0;
    965  1.1  christos   lastprimarylength = 100;
    966  1.1  christos   lastprimary = (char *) xmalloc (lastprimarylength + 1);
    967  1.1  christos   memset (lastprimary, '\0', lastprimarylength + 1);
    968  1.1  christos   lastsecondarylength = 100;
    969  1.1  christos   lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
    970  1.1  christos   memset (lastsecondary, '\0', lastsecondarylength + 1);
    971  1.1  christos }
    972  1.1  christos 
    973  1.1  christos /* Indexify.  Merge entries for the same name,
    974  1.1  christos    insert headers for each initial character, etc.  */
    975  1.1  christos 
    976  1.1  christos void
    977  1.1  christos indexify (char *line, FILE *ostream)
    978  1.1  christos {
    979  1.1  christos   char *primary, *secondary, *pagenumber;
    980  1.1  christos   int primarylength, secondarylength = 0, pagelength;
    981  1.1  christos   int nosecondary;
    982  1.1  christos   int initiallength;
    983  1.1  christos   char *initial;
    984  1.1  christos   char initial1[2];
    985  1.1  christos   register char *p;
    986  1.1  christos 
    987  1.1  christos   /* First, analyze the parts of the entry fed to us this time. */
    988  1.1  christos 
    989  1.1  christos   p = find_braced_pos (line, 0, 0, 0);
    990  1.1  christos   if (*p == '{')
    991  1.1  christos     {
    992  1.1  christos       initial = p;
    993  1.1  christos       /* Get length of inner pair of braces starting at `p',
    994  1.1  christos          including that inner pair of braces.  */
    995  1.1  christos       initiallength = find_braced_end (p + 1) + 1 - p;
    996  1.1  christos     }
    997  1.1  christos   else
    998  1.1  christos     {
    999  1.1  christos       initial = initial1;
   1000  1.1  christos       initial1[0] = toupper (*p);
   1001  1.1  christos       initial1[1] = 0;
   1002  1.1  christos       initiallength = 1;
   1003  1.1  christos     }
   1004  1.1  christos 
   1005  1.1  christos   pagenumber = find_braced_pos (line, 1, 0, 0);
   1006  1.1  christos   pagelength = find_braced_end (pagenumber) - pagenumber;
   1007  1.1  christos   if (pagelength == 0)
   1008  1.1  christos     fatal (_("No page number in %s"), line);
   1009  1.1  christos 
   1010  1.1  christos   primary = find_braced_pos (line, 2, 0, 0);
   1011  1.1  christos   primarylength = find_braced_end (primary) - primary;
   1012  1.1  christos 
   1013  1.1  christos   secondary = find_braced_pos (line, 3, 0, 0);
   1014  1.1  christos   nosecondary = !*secondary;
   1015  1.1  christos   if (!nosecondary)
   1016  1.1  christos     secondarylength = find_braced_end (secondary) - secondary;
   1017  1.1  christos 
   1018  1.1  christos   /* If the primary is different from before, make a new primary entry. */
   1019  1.1  christos   if (strncmp (primary, lastprimary, primarylength))
   1020  1.1  christos     {
   1021  1.1  christos       /* Close off current secondary entry first, if one is open. */
   1022  1.1  christos       if (pending)
   1023  1.1  christos         {
   1024  1.1  christos           fputs ("}\n", ostream);
   1025  1.1  christos           pending = 0;
   1026  1.1  christos         }
   1027  1.1  christos 
   1028  1.1  christos       /* If this primary has a different initial, include an entry for
   1029  1.1  christos          the initial. */
   1030  1.1  christos       if (need_initials &&
   1031  1.1  christos           (initiallength != lastinitiallength ||
   1032  1.1  christos            strncmp (initial, lastinitial, initiallength)))
   1033  1.1  christos         {
   1034  1.1  christos           fprintf (ostream, "\\initial {");
   1035  1.1  christos           fwrite (initial, 1, initiallength, ostream);
   1036  1.1  christos           fputs ("}\n", ostream);
   1037  1.1  christos           if (initial == initial1)
   1038  1.1  christos             {
   1039  1.1  christos               lastinitial = lastinitial1;
   1040  1.1  christos               *lastinitial1 = *initial1;
   1041  1.1  christos             }
   1042  1.1  christos           else
   1043  1.1  christos             {
   1044  1.1  christos               lastinitial = initial;
   1045  1.1  christos             }
   1046  1.1  christos           lastinitiallength = initiallength;
   1047  1.1  christos         }
   1048  1.1  christos 
   1049  1.1  christos       /* Make the entry for the primary.  */
   1050  1.1  christos       if (nosecondary)
   1051  1.1  christos         fputs ("\\entry {", ostream);
   1052  1.1  christos       else
   1053  1.1  christos         fputs ("\\primary {", ostream);
   1054  1.1  christos       fwrite (primary, primarylength, 1, ostream);
   1055  1.1  christos       if (nosecondary)
   1056  1.1  christos         {
   1057  1.1  christos           fputs ("}{", ostream);
   1058  1.1  christos           pending = 1;
   1059  1.1  christos         }
   1060  1.1  christos       else
   1061  1.1  christos         fputs ("}\n", ostream);
   1062  1.1  christos 
   1063  1.1  christos       /* Record name of most recent primary. */
   1064  1.1  christos       if (lastprimarylength < primarylength)
   1065  1.1  christos         {
   1066  1.1  christos           lastprimarylength = primarylength + 100;
   1067  1.1  christos           lastprimary = (char *) xrealloc (lastprimary,
   1068  1.1  christos                                            1 + lastprimarylength);
   1069  1.1  christos         }
   1070  1.1  christos       strncpy (lastprimary, primary, primarylength);
   1071  1.1  christos       lastprimary[primarylength] = 0;
   1072  1.1  christos 
   1073  1.1  christos       /* There is no current secondary within this primary, now. */
   1074  1.1  christos       lastsecondary[0] = 0;
   1075  1.1  christos     }
   1076  1.1  christos 
   1077  1.1  christos   /* Should not have an entry with no subtopic following one with a
   1078  1.1  christos      subtopic. */
   1079  1.1  christos 
   1080  1.1  christos   if (nosecondary && *lastsecondary)
   1081  1.1  christos     error (_("entry %s follows an entry with a secondary name"), line);
   1082  1.1  christos 
   1083  1.1  christos   /* Start a new secondary entry if necessary. */
   1084  1.1  christos   if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
   1085  1.1  christos     {
   1086  1.1  christos       if (pending)
   1087  1.1  christos         {
   1088  1.1  christos           fputs ("}\n", ostream);
   1089  1.1  christos           pending = 0;
   1090  1.1  christos         }
   1091  1.1  christos 
   1092  1.1  christos       /* Write the entry for the secondary.  */
   1093  1.1  christos       fputs ("\\secondary {", ostream);
   1094  1.1  christos       fwrite (secondary, secondarylength, 1, ostream);
   1095  1.1  christos       fputs ("}{", ostream);
   1096  1.1  christos       pending = 1;
   1097  1.1  christos 
   1098  1.1  christos       /* Record name of most recent secondary. */
   1099  1.1  christos       if (lastsecondarylength < secondarylength)
   1100  1.1  christos         {
   1101  1.1  christos           lastsecondarylength = secondarylength + 100;
   1102  1.1  christos           lastsecondary = (char *) xrealloc (lastsecondary,
   1103  1.1  christos                                              1 + lastsecondarylength);
   1104  1.1  christos         }
   1105  1.1  christos       strncpy (lastsecondary, secondary, secondarylength);
   1106  1.1  christos       lastsecondary[secondarylength] = 0;
   1107  1.1  christos     }
   1108  1.1  christos 
   1109  1.1  christos   /* Here to add one more page number to the current entry. */
   1110  1.1  christos   if (pending++ != 1)
   1111  1.1  christos     fputs (", ", ostream);  /* Punctuate first, if this is not the first. */
   1112  1.1  christos   fwrite (pagenumber, pagelength, 1, ostream);
   1113  1.1  christos }
   1114  1.1  christos 
   1115  1.1  christos /* Close out any unfinished output entry. */
   1116  1.1  christos 
   1117  1.1  christos void
   1118  1.1  christos finish_index (FILE *ostream)
   1119  1.1  christos {
   1120  1.1  christos   if (pending)
   1121  1.1  christos     fputs ("}\n", ostream);
   1122  1.1  christos   free (lastprimary);
   1123  1.1  christos   free (lastsecondary);
   1124  1.1  christos }
   1125  1.1  christos 
   1126  1.1  christos /* Copy the lines in the sorted order.
   1128  1.1  christos    Each line is copied out of the input file it was found in. */
   1129  1.1  christos 
   1130  1.1  christos void
   1131  1.1  christos writelines (char **linearray, int nlines, FILE *ostream)
   1132  1.1  christos {
   1133  1.1  christos   char **stop_line = linearray + nlines;
   1134  1.1  christos   char **next_line;
   1135  1.2  christos 
   1136  1.1  christos   init_index ();
   1137  1.1  christos 
   1138  1.1  christos   /* Output the text of the lines, and free the buffer space. */
   1139  1.1  christos 
   1140  1.1  christos   for (next_line = linearray; next_line != stop_line; next_line++)
   1141  1.1  christos     {
   1142  1.1  christos       /* Output the line only if distinct from previous one.  */
   1143  1.1  christos       if (next_line == linearray
   1144  1.1  christos       /* Compare previous line with this one, using only the
   1145  1.1  christos          explicitly specd keyfields. */
   1146  1.1  christos           || compare_general (*(next_line - 1), *next_line, 0L, 0L,
   1147  1.1  christos                               num_keyfields - 1))
   1148  1.1  christos         {
   1149  1.1  christos           char *p = *next_line;
   1150  1.1  christos           char c;
   1151  1.1  christos 
   1152  1.1  christos           while ((c = *p++) && c != '\n')
   1153  1.1  christos             /* Do nothing. */ ;
   1154  1.1  christos           *(p - 1) = 0;
   1155  1.1  christos           indexify (*next_line, ostream);
   1156  1.1  christos         }
   1157  1.1  christos     }
   1158  1.1  christos 
   1159  1.1  christos   finish_index (ostream);
   1160  1.1  christos }
   1161  1.1  christos 
   1162  1.1  christos /* Print error message and exit.  */
   1164  1.1  christos 
   1165  1.1  christos void
   1166  1.1  christos fatal (const char *format, const char *arg)
   1167  1.1  christos {
   1168  1.1  christos   error (format, arg);
   1169  1.1  christos   xexit (1);
   1170  1.1  christos }
   1171  1.1  christos 
   1172  1.1  christos /* Print error message.  FORMAT is printf control string, ARG is arg for it. */
   1173  1.1  christos void
   1174  1.1  christos error (const char *format, const char *arg)
   1175  1.1  christos {
   1176  1.1  christos   printf ("%s: ", program_name);
   1177  1.1  christos   printf (format, arg);
   1178  1.1  christos   if (format[strlen (format) -1] != '\n')
   1179  1.1  christos     printf ("\n");
   1180  1.1  christos }
   1181  1.1  christos 
   1182  1.1  christos void
   1183  1.1  christos perror_with_name (const char *name)
   1184  1.1  christos {
   1185  1.1  christos   fprintf (stderr, "%s: ", program_name);
   1186  1.1  christos   perror (name);
   1187  1.1  christos }
   1188  1.1  christos 
   1189  1.2  christos void
   1190  1.1  christos pfatal_with_name (const char *name)
   1191  1.2  christos {
   1192  1.2  christos   perror_with_name (name);
   1193  1.1  christos   xexit (1);
   1194  1.2  christos }
   1195  1.2  christos 
   1196  1.1  christos 
   1197  1.1  christos /* Return a newly-allocated string concatenating S1, S2, and S3.  */
   1199  1.2  christos 
   1200  1.2  christos static char *
   1201  1.1  christos concat3 (const char *s1, const char *s2, const char *s3)
   1202  1.1  christos {
   1203  1.1  christos   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
   1204                  char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
   1205                
   1206                  strcpy (result, s1);
   1207                  strcpy (result + len1, s2);
   1208                  strcpy (result + len1 + len2, s3);
   1209                  *(result + len1 + len2 + len3) = 0;
   1210                
   1211                  return result;
   1212                }
   1213