Home | History | Annotate | Line # | Download | only in util
texindex.c revision 1.1
      1  1.1  christos /*	$NetBSD: texindex.c,v 1.1 2016/01/14 00:11:29 christos Exp $	*/
      2  1.1  christos 
      3  1.1  christos /* texindex -- sort TeX index dribble output into an actual index.
      4  1.1  christos    Id: texindex.c,v 1.11 2004/04/11 17:56:47 karl Exp
      5  1.1  christos 
      6  1.1  christos    Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001,
      7  1.1  christos    2002, 2003, 2004 Free Software Foundation, Inc.
      8  1.1  christos 
      9  1.1  christos    This program is free software; you can redistribute it and/or modify
     10  1.1  christos    it under the terms of the GNU General Public License as published by
     11  1.1  christos    the Free Software Foundation; either version 2, or (at your option)
     12  1.1  christos    any later version.
     13  1.1  christos 
     14  1.1  christos    This program is distributed in the hope that it will be useful,
     15  1.1  christos    but WITHOUT ANY WARRANTY; without even the implied warranty of
     16  1.1  christos    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     17  1.1  christos    GNU General Public License for more details.
     18  1.1  christos 
     19  1.1  christos    You should have received a copy of the GNU General Public License
     20  1.1  christos    along with this program; if not, write to the Free Software
     21  1.1  christos    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
     22  1.1  christos 
     23  1.1  christos #include "system.h"
     24  1.1  christos #include <getopt.h>
     25  1.1  christos 
     26  1.1  christos static char *program_name = "texindex";
     27  1.1  christos 
     28  1.1  christos #if defined (emacs)
     29  1.1  christos #  include "../src/config.h"
     30  1.1  christos /* Some s/os.h files redefine these. */
     31  1.1  christos #  undef read
     32  1.1  christos #  undef close
     33  1.1  christos #  undef write
     34  1.1  christos #  undef open
     35  1.1  christos #endif
     36  1.1  christos 
     37  1.1  christos #if !defined (HAVE_MEMSET)
     38  1.1  christos #undef memset
     39  1.1  christos #define memset(ptr, ignore, count) bzero (ptr, count)
     40  1.1  christos #endif
     41  1.1  christos 
     42  1.1  christos char *mktemp (char *);
     43  1.1  christos 
     44  1.1  christos #if !defined (SEEK_SET)
     45  1.1  christos #  define SEEK_SET 0
     46  1.1  christos #  define SEEK_CUR 1
     47  1.1  christos #  define SEEK_END 2
     48  1.1  christos #endif /* !SEEK_SET */
     49  1.1  christos 
     50  1.1  christos struct linebuffer;
     51  1.1  christos 
     52  1.1  christos /* When sorting in core, this structure describes one line
     53  1.1  christos    and the position and length of its first keyfield.  */
     54  1.1  christos struct lineinfo
     55  1.1  christos {
     56  1.1  christos   char *text;           /* The actual text of the line. */
     57  1.1  christos   union {
     58  1.1  christos     char *text;         /* The start of the key (for textual comparison). */
     59  1.1  christos     long number;        /* The numeric value (for numeric comparison). */
     60  1.1  christos   } key;
     61  1.1  christos   long keylen;          /* Length of KEY field. */
     62  1.1  christos };
     63  1.1  christos 
     64  1.1  christos /* This structure describes a field to use as a sort key. */
     65  1.1  christos struct keyfield
     66  1.1  christos {
     67  1.1  christos   int startwords;       /* Number of words to skip. */
     68  1.1  christos   int startchars;       /* Number of additional chars to skip. */
     69  1.1  christos   int endwords;         /* Number of words to ignore at end. */
     70  1.1  christos   int endchars;         /* Ditto for characters of last word. */
     71  1.1  christos   char ignore_blanks;   /* Non-zero means ignore spaces and tabs. */
     72  1.1  christos   char fold_case;       /* Non-zero means case doesn't matter. */
     73  1.1  christos   char reverse;         /* Non-zero means compare in reverse order. */
     74  1.1  christos   char numeric;         /* Non-zeros means field is ASCII numeric. */
     75  1.1  christos   char positional;      /* Sort according to file position. */
     76  1.1  christos   char braced;          /* Count balanced-braced groupings as fields. */
     77  1.1  christos };
     78  1.1  christos 
     79  1.1  christos /* Vector of keyfields to use. */
     80  1.1  christos struct keyfield keyfields[3];
     81  1.1  christos 
     82  1.1  christos /* Number of keyfields stored in that vector.  */
     83  1.1  christos int num_keyfields = 3;
     84  1.1  christos 
     85  1.1  christos /* Vector of input file names, terminated with a null pointer. */
     86  1.1  christos char **infiles;
     87  1.1  christos 
     88  1.1  christos /* Vector of corresponding output file names, or NULL, meaning default it
     89  1.1  christos    (add an `s' to the end). */
     90  1.1  christos char **outfiles;
     91  1.1  christos 
     92  1.1  christos /* Length of `infiles'. */
     93  1.1  christos int num_infiles;
     94  1.1  christos 
     95  1.1  christos /* Pointer to the array of pointers to lines being sorted. */
     96  1.1  christos char **linearray;
     97  1.1  christos 
     98  1.1  christos /* The allocated length of `linearray'. */
     99  1.1  christos long nlines;
    100  1.1  christos 
    101  1.1  christos /* Directory to use for temporary files.  On Unix, it ends with a slash.  */
    102  1.1  christos char *tempdir;
    103  1.1  christos 
    104  1.1  christos /* Number of last temporary file.  */
    105  1.1  christos int tempcount;
    106  1.1  christos 
    107  1.1  christos /* Number of last temporary file already deleted.
    108  1.1  christos    Temporary files are deleted by `flush_tempfiles' in order of creation.  */
    109  1.1  christos int last_deleted_tempcount;
    110  1.1  christos 
    111  1.1  christos /* During in-core sort, this points to the base of the data block
    112  1.1  christos    which contains all the lines of data.  */
    113  1.1  christos char *text_base;
    114  1.1  christos 
    115  1.1  christos /* Initially 0; changed to 1 if we want initials in this index.  */
    116  1.1  christos int need_initials;
    117  1.1  christos 
    118  1.1  christos /* Remembers the first initial letter seen in this index, so we can
    119  1.1  christos    determine whether we need initials in the sorted form.  */
    120  1.1  christos char first_initial;
    121  1.1  christos 
    122  1.1  christos /* Additional command switches .*/
    123  1.1  christos 
    124  1.1  christos /* Nonzero means do not delete tempfiles -- for debugging. */
    125  1.1  christos int keep_tempfiles;
    126  1.1  christos 
    127  1.1  christos /* Forward declarations of functions in this file. */
    128  1.1  christos void decode_command (int argc, char **argv);
    129  1.1  christos void sort_in_core (char *infile, int total, char *outfile);
    130  1.1  christos void sort_offline (char *infile, off_t total, char *outfile);
    131  1.1  christos char **parsefile (char *filename, char **nextline, char *data, long int size);
    132  1.1  christos char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr);
    133  1.1  christos char *find_pos (char *str, int words, int chars, int ignore_blanks);
    134  1.1  christos long find_value (char *start, long int length);
    135  1.1  christos char *find_braced_pos (char *str, int words, int chars, int ignore_blanks);
    136  1.1  christos char *find_braced_end (char *str);
    137  1.1  christos void writelines (char **linearray, int nlines, FILE *ostream);
    138  1.1  christos int compare_field (struct keyfield *keyfield, char *start1,
    139  1.1  christos                    long int length1, long int pos1, char *start2,
    140  1.1  christos                    long int length2, long int pos2);
    141  1.1  christos int compare_full (const void *, const void *);
    142  1.1  christos long readline (struct linebuffer *linebuffer, FILE *stream);
    143  1.1  christos int merge_files (char **infiles, int nfiles, char *outfile);
    144  1.1  christos int merge_direct (char **infiles, int nfiles, char *outfile);
    145  1.1  christos void pfatal_with_name (const char *name);
    146  1.1  christos void fatal (const char *format, const char *arg);
    147  1.1  christos void error (const char *format, const char *arg);
    148  1.1  christos void *xmalloc (), *xrealloc ();
    149  1.1  christos char *concat (char *s1, char *s2);
    150  1.1  christos void flush_tempfiles (int to_count);
    151  1.1  christos 
    152  1.1  christos #define MAX_IN_CORE_SORT 500000
    154  1.1  christos 
    155  1.1  christos int
    156  1.1  christos main (int argc, char **argv)
    157  1.1  christos {
    158  1.1  christos   int i;
    159  1.1  christos 
    160  1.1  christos   tempcount = 0;
    161  1.1  christos   last_deleted_tempcount = 0;
    162  1.1  christos 
    163  1.1  christos #ifdef HAVE_SETLOCALE
    164  1.1  christos   /* Set locale via LC_ALL.  */
    165  1.1  christos   setlocale (LC_ALL, "");
    166  1.1  christos #endif
    167  1.1  christos 
    168  1.1  christos   /* Set the text message domain.  */
    169  1.1  christos   bindtextdomain (PACKAGE, LOCALEDIR);
    170  1.1  christos   textdomain (PACKAGE);
    171  1.1  christos 
    172  1.1  christos   /* In case we write to a redirected stdout that fails.  */
    173  1.1  christos   /* not ready atexit (close_stdout); */
    174  1.1  christos 
    175  1.1  christos   /* Describe the kind of sorting to do. */
    176  1.1  christos   /* The first keyfield uses the first braced field and folds case. */
    177  1.1  christos   keyfields[0].braced = 1;
    178  1.1  christos   keyfields[0].fold_case = 1;
    179  1.1  christos   keyfields[0].endwords = -1;
    180  1.1  christos   keyfields[0].endchars = -1;
    181  1.1  christos 
    182  1.1  christos   /* The second keyfield uses the second braced field, numerically. */
    183  1.1  christos   keyfields[1].braced = 1;
    184  1.1  christos   keyfields[1].numeric = 1;
    185  1.1  christos   keyfields[1].startwords = 1;
    186  1.1  christos   keyfields[1].endwords = -1;
    187  1.1  christos   keyfields[1].endchars = -1;
    188  1.1  christos 
    189  1.1  christos   /* The third keyfield (which is ignored while discarding duplicates)
    190  1.1  christos      compares the whole line. */
    191  1.1  christos   keyfields[2].endwords = -1;
    192  1.1  christos   keyfields[2].endchars = -1;
    193  1.1  christos 
    194  1.1  christos   decode_command (argc, argv);
    195  1.1  christos 
    196  1.1  christos   /* Process input files completely, one by one.  */
    197  1.1  christos 
    198  1.1  christos   for (i = 0; i < num_infiles; i++)
    199  1.1  christos     {
    200  1.1  christos       int desc;
    201  1.1  christos       off_t ptr;
    202  1.1  christos       char *outfile;
    203  1.1  christos       struct stat instat;
    204  1.1  christos 
    205  1.1  christos       desc = open (infiles[i], O_RDONLY, 0);
    206  1.1  christos       if (desc < 0)
    207  1.1  christos         pfatal_with_name (infiles[i]);
    208  1.1  christos 
    209  1.1  christos       if (stat (infiles[i], &instat))
    210  1.1  christos         pfatal_with_name (infiles[i]);
    211  1.1  christos       if (S_ISDIR (instat.st_mode))
    212  1.1  christos         {
    213  1.1  christos #ifdef EISDIR
    214  1.1  christos           errno = EISDIR;
    215  1.1  christos #endif
    216  1.1  christos           pfatal_with_name (infiles[i]);
    217  1.1  christos         }
    218  1.1  christos 
    219  1.1  christos       lseek (desc, (off_t) 0, SEEK_END);
    220  1.1  christos       ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR);
    221  1.1  christos 
    222  1.1  christos       close (desc);
    223  1.1  christos 
    224  1.1  christos       outfile = outfiles[i];
    225  1.1  christos       if (!outfile)
    226  1.1  christos         outfile = concat (infiles[i], "s");
    227  1.1  christos 
    228  1.1  christos       need_initials = 0;
    229  1.1  christos       first_initial = '\0';
    230  1.1  christos 
    231  1.1  christos       if (ptr < MAX_IN_CORE_SORT)
    232  1.1  christos         /* Sort a small amount of data. */
    233  1.1  christos         sort_in_core (infiles[i], (int)ptr, outfile);
    234  1.1  christos       else
    235  1.1  christos         sort_offline (infiles[i], ptr, outfile);
    236  1.1  christos     }
    237  1.1  christos 
    238  1.1  christos   flush_tempfiles (tempcount);
    239  1.1  christos   xexit (0);
    240  1.1  christos   return 0; /* Avoid bogus warnings.  */
    241  1.1  christos }
    242  1.1  christos 
    243  1.1  christos typedef struct
    245  1.1  christos {
    246  1.1  christos   char *long_name;
    247  1.1  christos   char *short_name;
    248  1.1  christos   int *variable_ref;
    249  1.1  christos   int variable_value;
    250  1.1  christos   char *arg_name;
    251  1.1  christos   char *doc_string;
    252  1.1  christos } TEXINDEX_OPTION;
    253  1.1  christos 
    254  1.1  christos TEXINDEX_OPTION texindex_options[] = {
    255  1.1  christos   { "--help", "-h", (int *)NULL, 0, (char *)NULL,
    256  1.1  christos       N_("display this help and exit") },
    257  1.1  christos   { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL,
    258  1.1  christos       N_("keep temporary files around after processing") },
    259  1.1  christos   { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL,
    260  1.1  christos       N_("do not keep temporary files around after processing (default)") },
    261  1.1  christos   { "--output", "-o", (int *)NULL, 0, "FILE",
    262  1.1  christos       N_("send output to FILE") },
    263  1.1  christos   { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
    264  1.1  christos       N_("display version information and exit") },
    265  1.1  christos   { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
    266  1.1  christos };
    267  1.1  christos 
    268  1.1  christos void
    269  1.1  christos usage (int result_value)
    270  1.1  christos {
    271  1.1  christos   register int i;
    272  1.1  christos   FILE *f = result_value ? stderr : stdout;
    273  1.1  christos 
    274  1.1  christos   fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name);
    275  1.1  christos   fprintf (f, _("Generate a sorted index for each TeX output FILE.\n"));
    276  1.1  christos   /* Avoid trigraph nonsense.  */
    277  1.1  christos   fprintf (f,
    278  1.1  christos _("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"),
    279  1.1  christos            '?', '?'); /* avoid trigraph in cat-id-tbl.c */
    280  1.1  christos   fprintf (f, _("\nOptions:\n"));
    281  1.1  christos 
    282  1.1  christos   for (i = 0; texindex_options[i].long_name; i++)
    283  1.1  christos     {
    284  1.1  christos       putc (' ', f);
    285  1.1  christos 
    286  1.1  christos       if (texindex_options[i].short_name)
    287  1.1  christos         fprintf (f, "%s, ", texindex_options[i].short_name);
    288  1.1  christos 
    289  1.1  christos       fprintf (f, "%s %s",
    290  1.1  christos                texindex_options[i].long_name,
    291  1.1  christos                texindex_options[i].arg_name
    292  1.1  christos                ? texindex_options[i].arg_name : "");
    293  1.1  christos 
    294  1.1  christos       fprintf (f, "\t%s\n", _(texindex_options[i].doc_string));
    295  1.1  christos     }
    296  1.1  christos   fputs (_("\n\
    297  1.1  christos Email bug reports to bug-texinfo (at) gnu.org,\n\
    298  1.1  christos general questions and discussion to help-texinfo (at) gnu.org.\n\
    299  1.1  christos Texinfo home page: http://www.gnu.org/software/texinfo/"), f);
    300  1.1  christos   fputs ("\n", f);
    301  1.1  christos 
    302  1.1  christos   xexit (result_value);
    303  1.1  christos }
    304  1.1  christos 
    305  1.1  christos /* Decode the command line arguments to set the parameter variables
    306  1.1  christos    and set up the vector of keyfields and the vector of input files. */
    307  1.1  christos 
    308  1.1  christos void
    309  1.1  christos decode_command (int argc, char **argv)
    310  1.1  christos {
    311  1.1  christos   int arg_index = 1;
    312  1.1  christos   char **ip;
    313  1.1  christos   char **op;
    314  1.1  christos 
    315  1.1  christos   /* Store default values into parameter variables. */
    316  1.1  christos 
    317  1.1  christos   tempdir = getenv ("TMPDIR");
    318  1.1  christos   if (tempdir == NULL)
    319  1.1  christos     tempdir = getenv ("TEMP");
    320  1.1  christos   if (tempdir == NULL)
    321  1.1  christos     tempdir = getenv ("TMP");
    322  1.1  christos   if (tempdir == NULL)
    323  1.1  christos     tempdir = DEFAULT_TMPDIR;
    324  1.1  christos   else
    325  1.1  christos     tempdir = concat (tempdir, "/");
    326  1.1  christos 
    327  1.1  christos   keep_tempfiles = 0;
    328  1.1  christos 
    329  1.1  christos   /* Allocate ARGC input files, which must be enough.  */
    330  1.1  christos 
    331  1.1  christos   infiles = (char **) xmalloc (argc * sizeof (char *));
    332  1.1  christos   outfiles = (char **) xmalloc (argc * sizeof (char *));
    333  1.1  christos   ip = infiles;
    334  1.1  christos   op = outfiles;
    335  1.1  christos 
    336  1.1  christos   while (arg_index < argc)
    337  1.1  christos     {
    338  1.1  christos       char *arg = argv[arg_index++];
    339  1.1  christos 
    340  1.1  christos       if (*arg == '-')
    341  1.1  christos         {
    342  1.1  christos           if (strcmp (arg, "--version") == 0)
    343  1.1  christos             {
    344  1.1  christos               printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION);
    345  1.1  christos               puts ("");
    346  1.1  christos               puts ("Copyright (C) 2004 Free Software Foundation, Inc.");
    347  1.1  christos               printf (_("There is NO warranty.  You may redistribute this software\n\
    348  1.1  christos under the terms of the GNU General Public License.\n\
    349  1.1  christos For more information about these matters, see the files named COPYING.\n"));
    350  1.1  christos               xexit (0);
    351  1.1  christos             }
    352  1.1  christos           else if ((strcmp (arg, "--keep") == 0) ||
    353  1.1  christos                    (strcmp (arg, "-k") == 0))
    354  1.1  christos             {
    355  1.1  christos               keep_tempfiles = 1;
    356  1.1  christos             }
    357  1.1  christos           else if ((strcmp (arg, "--help") == 0) ||
    358  1.1  christos                    (strcmp (arg, "-h") == 0))
    359  1.1  christos             {
    360  1.1  christos               usage (0);
    361  1.1  christos             }
    362  1.1  christos           else if ((strcmp (arg, "--output") == 0) ||
    363  1.1  christos                    (strcmp (arg, "-o") == 0))
    364  1.1  christos             {
    365  1.1  christos               if (argv[arg_index] != (char *)NULL)
    366  1.1  christos                 {
    367  1.1  christos                   arg_index++;
    368  1.1  christos                   if (op > outfiles)
    369  1.1  christos                     *(op - 1) = argv[arg_index];
    370  1.1  christos                 }
    371  1.1  christos               else
    372  1.1  christos                 usage (1);
    373  1.1  christos             }
    374  1.1  christos           else
    375  1.1  christos             usage (1);
    376  1.1  christos         }
    377  1.1  christos       else
    378  1.1  christos         {
    379  1.1  christos           *ip++ = arg;
    380  1.1  christos           *op++ = (char *)NULL;
    381  1.1  christos         }
    382  1.1  christos     }
    383  1.1  christos 
    384  1.1  christos   /* Record number of keyfields and terminate list of filenames. */
    385  1.1  christos   num_infiles = ip - infiles;
    386  1.1  christos   *ip = (char *)NULL;
    387  1.1  christos   if (num_infiles == 0)
    388  1.1  christos     usage (1);
    389  1.1  christos }
    390  1.1  christos 
    391  1.1  christos /* Return a name for temporary file COUNT. */
    393  1.1  christos 
    394  1.1  christos static char *
    395  1.1  christos maketempname (int count)
    396  1.1  christos {
    397  1.1  christos   static char *tempbase = NULL;
    398  1.1  christos   char tempsuffix[10];
    399  1.1  christos 
    400  1.1  christos   if (!tempbase)
    401  1.1  christos     {
    402  1.1  christos       int fd;
    403  1.1  christos       tempbase = concat (tempdir, "txidxXXXXXX");
    404  1.1  christos 
    405  1.1  christos       fd = mkstemp (tempbase);
    406  1.1  christos       if (fd == -1)
    407  1.1  christos         pfatal_with_name (tempbase);
    408  1.1  christos     }
    409  1.1  christos 
    410  1.1  christos   sprintf (tempsuffix, ".%d", count);
    411  1.1  christos   return concat (tempbase, tempsuffix);
    412  1.1  christos }
    413  1.1  christos 
    414  1.1  christos 
    415  1.1  christos /* Delete all temporary files up to TO_COUNT. */
    416  1.1  christos 
    417  1.1  christos void
    418  1.1  christos flush_tempfiles (int to_count)
    419  1.1  christos {
    420  1.1  christos   if (keep_tempfiles)
    421  1.1  christos     return;
    422  1.1  christos   while (last_deleted_tempcount < to_count)
    423  1.1  christos     unlink (maketempname (++last_deleted_tempcount));
    424  1.1  christos }
    425  1.1  christos 
    426  1.1  christos 
    427  1.1  christos /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
    429  1.1  christos 
    430  1.1  christos int
    431  1.1  christos compare_full (const void *p1, const void *p2)
    432  1.1  christos {
    433  1.1  christos   char **line1 = (char **) p1;
    434  1.1  christos   char **line2 = (char **) p2;
    435  1.1  christos   int i;
    436  1.1  christos 
    437  1.1  christos   /* Compare using the first keyfield;
    438  1.1  christos      if that does not distinguish the lines, try the second keyfield;
    439  1.1  christos      and so on. */
    440  1.1  christos 
    441  1.1  christos   for (i = 0; i < num_keyfields; i++)
    442  1.1  christos     {
    443  1.1  christos       long length1, length2;
    444  1.1  christos       char *start1 = find_field (&keyfields[i], *line1, &length1);
    445  1.1  christos       char *start2 = find_field (&keyfields[i], *line2, &length2);
    446  1.1  christos       int tem = compare_field (&keyfields[i], start1, length1,
    447  1.1  christos                                *line1 - text_base,
    448  1.1  christos                                start2, length2, *line2 - text_base);
    449  1.1  christos       if (tem)
    450  1.1  christos         {
    451  1.1  christos           if (keyfields[i].reverse)
    452  1.1  christos             return -tem;
    453  1.1  christos           return tem;
    454  1.1  christos         }
    455  1.1  christos     }
    456  1.1  christos 
    457  1.1  christos   return 0;                     /* Lines match exactly. */
    458  1.1  christos }
    459  1.1  christos 
    460  1.1  christos /* Compare LINE1 and LINE2, described by structures
    461  1.1  christos    in which the first keyfield is identified in advance.
    462  1.1  christos    For positional sorting, assumes that the order of the lines in core
    463  1.1  christos    reflects their nominal order.  */
    464  1.1  christos int
    465  1.1  christos compare_prepared (const void *p1, const void *p2)
    466  1.1  christos {
    467  1.1  christos   struct lineinfo *line1 = (struct lineinfo *) p1;
    468  1.1  christos   struct lineinfo *line2 = (struct lineinfo *) p2;
    469  1.1  christos   int i;
    470  1.1  christos   int tem;
    471  1.1  christos   char *text1, *text2;
    472  1.1  christos 
    473  1.1  christos   /* Compare using the first keyfield, which has been found for us already. */
    474  1.1  christos   if (keyfields->positional)
    475  1.1  christos     {
    476  1.1  christos       if (line1->text - text_base > line2->text - text_base)
    477  1.1  christos         tem = 1;
    478  1.1  christos       else
    479  1.1  christos         tem = -1;
    480  1.1  christos     }
    481  1.1  christos   else if (keyfields->numeric)
    482  1.1  christos     tem = line1->key.number - line2->key.number;
    483  1.1  christos   else
    484  1.1  christos     tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
    485  1.1  christos                          line2->key.text, line2->keylen, 0);
    486  1.1  christos   if (tem)
    487  1.1  christos     {
    488  1.1  christos       if (keyfields->reverse)
    489  1.1  christos         return -tem;
    490  1.1  christos       return tem;
    491  1.1  christos     }
    492  1.1  christos 
    493  1.1  christos   text1 = line1->text;
    494  1.1  christos   text2 = line2->text;
    495  1.1  christos 
    496  1.1  christos   /* Compare using the second keyfield;
    497  1.1  christos      if that does not distinguish the lines, try the third keyfield;
    498  1.1  christos      and so on. */
    499  1.1  christos 
    500  1.1  christos   for (i = 1; i < num_keyfields; i++)
    501  1.1  christos     {
    502  1.1  christos       long length1, length2;
    503  1.1  christos       char *start1 = find_field (&keyfields[i], text1, &length1);
    504  1.1  christos       char *start2 = find_field (&keyfields[i], text2, &length2);
    505  1.1  christos       int tem = compare_field (&keyfields[i], start1, length1,
    506  1.1  christos                                text1 - text_base,
    507  1.1  christos                                start2, length2, text2 - text_base);
    508  1.1  christos       if (tem)
    509  1.1  christos         {
    510  1.1  christos           if (keyfields[i].reverse)
    511  1.1  christos             return -tem;
    512  1.1  christos           return tem;
    513  1.1  christos         }
    514  1.1  christos     }
    515  1.1  christos 
    516  1.1  christos   return 0;                     /* Lines match exactly. */
    517  1.1  christos }
    518  1.1  christos 
    519  1.1  christos /* Like compare_full but more general.
    520  1.1  christos    You can pass any strings, and you can say how many keyfields to use.
    521  1.1  christos    POS1 and POS2 should indicate the nominal positional ordering of
    522  1.1  christos    the two lines in the input.  */
    523  1.1  christos 
    524  1.1  christos int
    525  1.1  christos compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields)
    526  1.1  christos {
    527  1.1  christos   int i;
    528  1.1  christos 
    529  1.1  christos   /* Compare using the first keyfield;
    530  1.1  christos      if that does not distinguish the lines, try the second keyfield;
    531  1.1  christos      and so on. */
    532  1.1  christos 
    533  1.1  christos   for (i = 0; i < use_keyfields; i++)
    534  1.1  christos     {
    535  1.1  christos       long length1, length2;
    536  1.1  christos       char *start1 = find_field (&keyfields[i], str1, &length1);
    537  1.1  christos       char *start2 = find_field (&keyfields[i], str2, &length2);
    538  1.1  christos       int tem = compare_field (&keyfields[i], start1, length1, pos1,
    539  1.1  christos                                start2, length2, pos2);
    540  1.1  christos       if (tem)
    541  1.1  christos         {
    542  1.1  christos           if (keyfields[i].reverse)
    543  1.1  christos             return -tem;
    544  1.1  christos           return tem;
    545  1.1  christos         }
    546  1.1  christos     }
    547  1.1  christos 
    548  1.1  christos   return 0;                     /* Lines match exactly. */
    549  1.1  christos }
    550  1.1  christos 
    551  1.1  christos /* Find the start and length of a field in STR according to KEYFIELD.
    552  1.1  christos    A pointer to the starting character is returned, and the length
    553  1.1  christos    is stored into the int that LENGTHPTR points to.  */
    554  1.1  christos 
    555  1.1  christos char *
    556  1.1  christos find_field (struct keyfield *keyfield, char *str, long int *lengthptr)
    557  1.1  christos {
    558  1.1  christos   char *start;
    559  1.1  christos   char *end;
    560  1.1  christos   char *(*fun) ();
    561  1.1  christos 
    562  1.1  christos   if (keyfield->braced)
    563  1.1  christos     fun = find_braced_pos;
    564  1.1  christos   else
    565  1.1  christos     fun = find_pos;
    566  1.1  christos 
    567  1.1  christos   start = (*fun) (str, keyfield->startwords, keyfield->startchars,
    568  1.1  christos                   keyfield->ignore_blanks);
    569  1.1  christos   if (keyfield->endwords < 0)
    570  1.1  christos     {
    571  1.1  christos       if (keyfield->braced)
    572  1.1  christos         end = find_braced_end (start);
    573  1.1  christos       else
    574  1.1  christos         {
    575  1.1  christos           end = start;
    576  1.1  christos           while (*end && *end != '\n')
    577  1.1  christos             end++;
    578  1.1  christos         }
    579  1.1  christos     }
    580  1.1  christos   else
    581  1.1  christos     {
    582  1.1  christos       end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
    583  1.1  christos       if (end - str < start - str)
    584  1.1  christos         end = start;
    585  1.1  christos     }
    586  1.1  christos   *lengthptr = end - start;
    587  1.1  christos   return start;
    588  1.1  christos }
    589  1.1  christos 
    590  1.1  christos /* Return a pointer to a specified place within STR,
    591  1.1  christos    skipping (from the beginning) WORDS words and then CHARS chars.
    592  1.1  christos    If IGNORE_BLANKS is nonzero, we skip all blanks
    593  1.1  christos    after finding the specified word.  */
    594  1.1  christos 
    595  1.1  christos char *
    596  1.1  christos find_pos (char *str, int words, int chars, int ignore_blanks)
    597  1.1  christos {
    598  1.1  christos   int i;
    599  1.1  christos   char *p = str;
    600  1.1  christos 
    601  1.1  christos   for (i = 0; i < words; i++)
    602  1.1  christos     {
    603  1.1  christos       char c;
    604  1.1  christos       /* Find next bunch of nonblanks and skip them. */
    605  1.1  christos       while ((c = *p) == ' ' || c == '\t')
    606  1.1  christos         p++;
    607  1.1  christos       while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
    608  1.1  christos         p++;
    609  1.1  christos       if (!*p || *p == '\n')
    610  1.1  christos         return p;
    611  1.1  christos     }
    612  1.1  christos 
    613  1.1  christos   while (*p == ' ' || *p == '\t')
    614  1.1  christos     p++;
    615  1.1  christos 
    616  1.1  christos   for (i = 0; i < chars; i++)
    617  1.1  christos     {
    618  1.1  christos       if (!*p || *p == '\n')
    619  1.1  christos         break;
    620  1.1  christos       p++;
    621  1.1  christos     }
    622  1.1  christos   return p;
    623  1.1  christos }
    624  1.1  christos 
    625  1.1  christos /* Like find_pos but assumes that each field is surrounded by braces
    626  1.1  christos    and that braces within fields are balanced. */
    627  1.1  christos 
    628  1.1  christos char *
    629  1.1  christos find_braced_pos (char *str, int words, int chars, int ignore_blanks)
    630  1.1  christos {
    631  1.1  christos   int i;
    632  1.1  christos   int bracelevel;
    633  1.1  christos   char *p = str;
    634  1.1  christos   char c;
    635  1.1  christos 
    636  1.1  christos   for (i = 0; i < words; i++)
    637  1.1  christos     {
    638  1.1  christos       bracelevel = 1;
    639  1.1  christos       while ((c = *p++) != '{' && c != '\n' && c)
    640  1.1  christos         /* Do nothing. */ ;
    641  1.1  christos       if (c != '{')
    642  1.1  christos         return p - 1;
    643  1.1  christos       while (bracelevel)
    644  1.1  christos         {
    645  1.1  christos           c = *p++;
    646  1.1  christos           if (c == '{')
    647  1.1  christos             bracelevel++;
    648  1.1  christos           if (c == '}')
    649  1.1  christos             bracelevel--;
    650  1.1  christos           if (c == 0 || c == '\n')
    651  1.1  christos             return p - 1;
    652  1.1  christos         }
    653  1.1  christos     }
    654  1.1  christos 
    655  1.1  christos   while ((c = *p++) != '{' && c != '\n' && c)
    656  1.1  christos     /* Do nothing. */ ;
    657  1.1  christos 
    658  1.1  christos   if (c != '{')
    659  1.1  christos     return p - 1;
    660  1.1  christos 
    661  1.1  christos   if (ignore_blanks)
    662  1.1  christos     while ((c = *p) == ' ' || c == '\t')
    663  1.1  christos       p++;
    664  1.1  christos 
    665  1.1  christos   for (i = 0; i < chars; i++)
    666  1.1  christos     {
    667  1.1  christos       if (!*p || *p == '\n')
    668  1.1  christos         break;
    669  1.1  christos       p++;
    670  1.1  christos     }
    671  1.1  christos   return p;
    672  1.1  christos }
    673  1.1  christos 
    674  1.1  christos /* Find the end of the balanced-brace field which starts at STR.
    675  1.1  christos    The position returned is just before the closing brace. */
    676  1.1  christos 
    677  1.1  christos char *
    678  1.1  christos find_braced_end (char *str)
    679  1.1  christos {
    680  1.1  christos   int bracelevel;
    681  1.1  christos   char *p = str;
    682  1.1  christos   char c;
    683  1.1  christos 
    684  1.1  christos   bracelevel = 1;
    685  1.1  christos   while (bracelevel)
    686  1.1  christos     {
    687  1.1  christos       c = *p++;
    688  1.1  christos       if (c == '{')
    689  1.1  christos         bracelevel++;
    690  1.1  christos       if (c == '}')
    691  1.1  christos         bracelevel--;
    692  1.1  christos       if (c == 0 || c == '\n')
    693  1.1  christos         return p - 1;
    694  1.1  christos     }
    695  1.1  christos   return p - 1;
    696  1.1  christos }
    697  1.1  christos 
    698  1.1  christos long
    699  1.1  christos find_value (char *start, long int length)
    700  1.1  christos {
    701  1.1  christos   while (length != 0L)
    702  1.1  christos     {
    703  1.1  christos       if (isdigit (*start))
    704  1.1  christos         return atol (start);
    705  1.1  christos       length--;
    706  1.1  christos       start++;
    707  1.1  christos     }
    708  1.1  christos   return 0l;
    709  1.1  christos }
    710  1.1  christos 
    711  1.1  christos /* Vector used to translate characters for comparison.
    712  1.1  christos    This is how we make all alphanumerics follow all else,
    713  1.1  christos    and ignore case in the first sorting.  */
    714  1.1  christos int char_order[256];
    715  1.1  christos 
    716  1.1  christos void
    717  1.1  christos init_char_order (void)
    718  1.1  christos {
    719  1.1  christos   int i;
    720  1.1  christos   for (i = 1; i < 256; i++)
    721  1.1  christos     char_order[i] = i;
    722  1.1  christos 
    723  1.1  christos   for (i = '0'; i <= '9'; i++)
    724  1.1  christos     char_order[i] += 512;
    725  1.1  christos 
    726  1.1  christos   for (i = 'a'; i <= 'z'; i++)
    727  1.1  christos     {
    728  1.1  christos       char_order[i] = 512 + i;
    729  1.1  christos       char_order[i + 'A' - 'a'] = 512 + i;
    730  1.1  christos     }
    731  1.1  christos }
    732  1.1  christos 
    733  1.1  christos /* Compare two fields (each specified as a start pointer and a character count)
    734  1.1  christos    according to KEYFIELD.
    735  1.1  christos    The sign of the value reports the relation between the fields. */
    736  1.1  christos 
    737  1.1  christos int
    738  1.1  christos compare_field (struct keyfield *keyfield, char *start1, long int length1,
    739  1.1  christos                long int pos1, char *start2, long int length2, long int pos2)
    740  1.1  christos {
    741  1.1  christos   if (keyfields->positional)
    742  1.1  christos     {
    743  1.1  christos       if (pos1 > pos2)
    744  1.1  christos         return 1;
    745  1.1  christos       else
    746  1.1  christos         return -1;
    747  1.1  christos     }
    748  1.1  christos   if (keyfield->numeric)
    749  1.1  christos     {
    750  1.1  christos       long value = find_value (start1, length1) - find_value (start2, length2);
    751  1.1  christos       if (value > 0)
    752  1.1  christos         return 1;
    753  1.1  christos       if (value < 0)
    754  1.1  christos         return -1;
    755  1.1  christos       return 0;
    756  1.1  christos     }
    757  1.1  christos   else
    758  1.1  christos     {
    759  1.1  christos       char *p1 = start1;
    760  1.1  christos       char *p2 = start2;
    761  1.1  christos       char *e1 = start1 + length1;
    762  1.1  christos       char *e2 = start2 + length2;
    763  1.1  christos 
    764  1.1  christos       while (1)
    765  1.1  christos         {
    766  1.1  christos           int c1, c2;
    767  1.1  christos 
    768  1.1  christos           if (p1 == e1)
    769  1.1  christos             c1 = 0;
    770  1.1  christos           else
    771  1.1  christos             c1 = *p1++;
    772  1.1  christos           if (p2 == e2)
    773  1.1  christos             c2 = 0;
    774  1.1  christos           else
    775  1.1  christos             c2 = *p2++;
    776  1.1  christos 
    777  1.1  christos           if (char_order[c1] != char_order[c2])
    778  1.1  christos             return char_order[c1] - char_order[c2];
    779  1.1  christos           if (!c1)
    780  1.1  christos             break;
    781  1.1  christos         }
    782  1.1  christos 
    783  1.1  christos       /* Strings are equal except possibly for case.  */
    784  1.1  christos       p1 = start1;
    785  1.1  christos       p2 = start2;
    786  1.1  christos       while (1)
    787  1.1  christos         {
    788  1.1  christos           int c1, c2;
    789  1.1  christos 
    790  1.1  christos           if (p1 == e1)
    791  1.1  christos             c1 = 0;
    792  1.1  christos           else
    793  1.1  christos             c1 = *p1++;
    794  1.1  christos           if (p2 == e2)
    795  1.1  christos             c2 = 0;
    796  1.1  christos           else
    797  1.1  christos             c2 = *p2++;
    798  1.1  christos 
    799  1.1  christos           if (c1 != c2)
    800  1.1  christos             /* Reverse sign here so upper case comes out last.  */
    801  1.1  christos             return c2 - c1;
    802  1.1  christos           if (!c1)
    803  1.1  christos             break;
    804  1.1  christos         }
    805  1.1  christos 
    806  1.1  christos       return 0;
    807  1.1  christos     }
    808  1.1  christos }
    809  1.1  christos 
    810  1.1  christos /* A `struct linebuffer' is a structure which holds a line of text.
    812  1.1  christos    `readline' reads a line from a stream into a linebuffer
    813  1.1  christos    and works regardless of the length of the line.  */
    814  1.1  christos 
    815  1.1  christos struct linebuffer
    816  1.1  christos {
    817  1.1  christos   long size;
    818  1.1  christos   char *buffer;
    819  1.1  christos };
    820  1.1  christos 
    821  1.1  christos /* Initialize LINEBUFFER for use. */
    822  1.1  christos 
    823  1.1  christos void
    824  1.1  christos initbuffer (struct linebuffer *linebuffer)
    825  1.1  christos {
    826  1.1  christos   linebuffer->size = 200;
    827  1.1  christos   linebuffer->buffer = (char *) xmalloc (200);
    828  1.1  christos }
    829  1.1  christos 
    830  1.1  christos /* Read a line of text from STREAM into LINEBUFFER.
    831  1.1  christos    Return the length of the line.  */
    832  1.1  christos 
    833  1.1  christos long
    834  1.1  christos readline (struct linebuffer *linebuffer, FILE *stream)
    835  1.1  christos {
    836  1.1  christos   char *buffer = linebuffer->buffer;
    837  1.1  christos   char *p = linebuffer->buffer;
    838  1.1  christos   char *end = p + linebuffer->size;
    839  1.1  christos 
    840  1.1  christos   while (1)
    841  1.1  christos     {
    842  1.1  christos       int c = getc (stream);
    843  1.1  christos       if (p == end)
    844  1.1  christos         {
    845  1.1  christos           buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
    846  1.1  christos           p += buffer - linebuffer->buffer;
    847  1.1  christos           end += buffer - linebuffer->buffer;
    848  1.1  christos           linebuffer->buffer = buffer;
    849  1.1  christos         }
    850  1.1  christos       if (c < 0 || c == '\n')
    851  1.1  christos         {
    852  1.1  christos           *p = 0;
    853  1.1  christos           break;
    854  1.1  christos         }
    855  1.1  christos       *p++ = c;
    856  1.1  christos     }
    857  1.1  christos 
    858  1.1  christos   return p - buffer;
    859  1.1  christos }
    860  1.1  christos 
    861  1.1  christos /* Sort an input file too big to sort in core.  */
    863  1.1  christos 
    864  1.1  christos void
    865  1.1  christos sort_offline (char *infile, off_t total, char *outfile)
    866  1.1  christos {
    867  1.1  christos   /* More than enough. */
    868  1.1  christos   int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;
    869  1.1  christos   char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
    870  1.1  christos   FILE *istream = fopen (infile, "r");
    871  1.1  christos   int i;
    872  1.1  christos   struct linebuffer lb;
    873  1.1  christos   long linelength;
    874  1.1  christos   int failure = 0;
    875  1.1  christos 
    876  1.1  christos   initbuffer (&lb);
    877  1.1  christos 
    878  1.1  christos   /* Read in one line of input data.  */
    879  1.1  christos 
    880  1.1  christos   linelength = readline (&lb, istream);
    881  1.1  christos 
    882  1.1  christos   if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
    883  1.1  christos     {
    884  1.1  christos       error (_("%s: not a texinfo index file"), infile);
    885  1.1  christos       return;
    886  1.1  christos     }
    887  1.1  christos 
    888  1.1  christos   /* Split up the input into `ntemps' temporary files, or maybe fewer,
    889  1.1  christos      and put the new files' names into `tempfiles' */
    890  1.1  christos 
    891  1.1  christos   for (i = 0; i < ntemps; i++)
    892  1.1  christos     {
    893  1.1  christos       char *outname = maketempname (++tempcount);
    894  1.1  christos       FILE *ostream = fopen (outname, "w");
    895  1.1  christos       long tempsize = 0;
    896  1.1  christos 
    897  1.1  christos       if (!ostream)
    898  1.1  christos         pfatal_with_name (outname);
    899  1.1  christos       tempfiles[i] = outname;
    900  1.1  christos 
    901  1.1  christos       /* Copy lines into this temp file as long as it does not make file
    902  1.1  christos          "too big" or until there are no more lines.  */
    903  1.1  christos 
    904  1.1  christos       while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
    905  1.1  christos         {
    906  1.1  christos           tempsize += linelength + 1;
    907  1.1  christos           fputs (lb.buffer, ostream);
    908  1.1  christos           putc ('\n', ostream);
    909  1.1  christos 
    910  1.1  christos           /* Read another line of input data.  */
    911  1.1  christos 
    912  1.1  christos           linelength = readline (&lb, istream);
    913  1.1  christos           if (!linelength && feof (istream))
    914  1.1  christos             break;
    915  1.1  christos 
    916  1.1  christos           if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
    917  1.1  christos             {
    918  1.1  christos               error (_("%s: not a texinfo index file"), infile);
    919  1.1  christos               failure = 1;
    920  1.1  christos               goto fail;
    921  1.1  christos             }
    922  1.1  christos         }
    923  1.1  christos       fclose (ostream);
    924  1.1  christos       if (feof (istream))
    925  1.1  christos         break;
    926  1.1  christos     }
    927  1.1  christos 
    928  1.1  christos   free (lb.buffer);
    929  1.1  christos 
    930  1.1  christos fail:
    931  1.1  christos   /* Record number of temp files we actually needed.  */
    932  1.1  christos 
    933  1.1  christos   ntemps = i;
    934  1.1  christos 
    935  1.1  christos   /* Sort each tempfile into another tempfile.
    936  1.1  christos     Delete the first set of tempfiles and put the names of the second
    937  1.1  christos     into `tempfiles'. */
    938  1.1  christos 
    939  1.1  christos   for (i = 0; i < ntemps; i++)
    940  1.1  christos     {
    941  1.1  christos       char *newtemp = maketempname (++tempcount);
    942  1.1  christos       sort_in_core (tempfiles[i], MAX_IN_CORE_SORT, newtemp);
    943  1.1  christos       if (!keep_tempfiles)
    944  1.1  christos         unlink (tempfiles[i]);
    945  1.1  christos       tempfiles[i] = newtemp;
    946  1.1  christos     }
    947  1.1  christos 
    948  1.1  christos   if (failure)
    949  1.1  christos     return;
    950  1.1  christos 
    951  1.1  christos   /* Merge the tempfiles together and indexify. */
    952  1.1  christos 
    953  1.1  christos   merge_files (tempfiles, ntemps, outfile);
    954  1.1  christos }
    955  1.1  christos 
    956  1.1  christos /* Sort INFILE, whose size is TOTAL,
    958  1.1  christos    assuming that is small enough to be done in-core,
    959  1.1  christos    then indexify it and send the output to OUTFILE (or to stdout).  */
    960  1.1  christos 
    961  1.1  christos void
    962  1.1  christos sort_in_core (char *infile, int total, char *outfile)
    963  1.1  christos {
    964  1.1  christos   char **nextline;
    965  1.1  christos   char *data = (char *) xmalloc (total + 1);
    966  1.1  christos   char *file_data;
    967  1.1  christos   long file_size;
    968  1.1  christos   int i;
    969  1.1  christos   FILE *ostream = stdout;
    970  1.1  christos   struct lineinfo *lineinfo;
    971  1.1  christos 
    972  1.1  christos   /* Read the contents of the file into the moby array `data'. */
    973  1.1  christos 
    974  1.1  christos   int desc = open (infile, O_RDONLY, 0);
    975  1.1  christos 
    976  1.1  christos   if (desc < 0)
    977  1.1  christos     fatal (_("failure reopening %s"), infile);
    978  1.1  christos   for (file_size = 0;;)
    979  1.1  christos     {
    980  1.1  christos       i = read (desc, data + file_size, total - file_size);
    981  1.1  christos       if (i <= 0)
    982  1.1  christos         break;
    983  1.1  christos       file_size += i;
    984  1.1  christos     }
    985  1.1  christos   file_data = data;
    986  1.1  christos   data[file_size] = 0;
    987  1.1  christos 
    988  1.1  christos   close (desc);
    989  1.1  christos 
    990  1.1  christos   if (file_size > 0 && data[0] != '\\' && data[0] != '@')
    991  1.1  christos     {
    992  1.1  christos       error (_("%s: not a texinfo index file"), infile);
    993  1.1  christos       return;
    994  1.1  christos     }
    995  1.1  christos 
    996  1.1  christos   init_char_order ();
    997  1.1  christos 
    998  1.1  christos   /* Sort routines want to know this address. */
    999  1.1  christos 
   1000  1.1  christos   text_base = data;
   1001  1.1  christos 
   1002  1.1  christos   /* Create the array of pointers to lines, with a default size
   1003  1.1  christos      frequently enough.  */
   1004  1.1  christos 
   1005  1.1  christos   nlines = total / 50;
   1006  1.1  christos   if (!nlines)
   1007  1.1  christos     nlines = 2;
   1008  1.1  christos   linearray = (char **) xmalloc (nlines * sizeof (char *));
   1009  1.1  christos 
   1010  1.1  christos   /* `nextline' points to the next free slot in this array.
   1011  1.1  christos      `nlines' is the allocated size.  */
   1012  1.1  christos 
   1013  1.1  christos   nextline = linearray;
   1014  1.1  christos 
   1015  1.1  christos   /* Parse the input file's data, and make entries for the lines.  */
   1016  1.1  christos 
   1017  1.1  christos   nextline = parsefile (infile, nextline, file_data, file_size);
   1018  1.1  christos   if (nextline == 0)
   1019  1.1  christos     {
   1020  1.1  christos       error (_("%s: not a texinfo index file"), infile);
   1021  1.1  christos       return;
   1022  1.1  christos     }
   1023  1.1  christos 
   1024  1.1  christos   /* Sort the lines. */
   1025  1.1  christos 
   1026  1.1  christos   /* If we have enough space, find the first keyfield of each line in advance.
   1027  1.1  christos      Make a `struct lineinfo' for each line, which records the keyfield
   1028  1.1  christos      as well as the line, and sort them.  */
   1029  1.1  christos 
   1030  1.1  christos   lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo));
   1031  1.1  christos 
   1032  1.1  christos   if (lineinfo)
   1033  1.1  christos     {
   1034  1.1  christos       struct lineinfo *lp;
   1035  1.1  christos       char **p;
   1036  1.1  christos 
   1037  1.1  christos       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
   1038  1.1  christos         {
   1039  1.1  christos           lp->text = *p;
   1040  1.1  christos           lp->key.text = find_field (keyfields, *p, &lp->keylen);
   1041  1.1  christos           if (keyfields->numeric)
   1042  1.1  christos             lp->key.number = find_value (lp->key.text, lp->keylen);
   1043  1.1  christos         }
   1044  1.1  christos 
   1045  1.1  christos       qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
   1046  1.1  christos              compare_prepared);
   1047  1.1  christos 
   1048  1.1  christos       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
   1049  1.1  christos         *p = lp->text;
   1050  1.1  christos 
   1051  1.1  christos       free (lineinfo);
   1052  1.1  christos     }
   1053  1.1  christos   else
   1054  1.1  christos     qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
   1055  1.1  christos 
   1056  1.1  christos   /* Open the output file. */
   1057  1.1  christos 
   1058  1.1  christos   if (outfile)
   1059  1.1  christos     {
   1060  1.1  christos       ostream = fopen (outfile, "w");
   1061  1.1  christos       if (!ostream)
   1062  1.1  christos         pfatal_with_name (outfile);
   1063  1.1  christos     }
   1064  1.1  christos 
   1065  1.1  christos   writelines (linearray, nextline - linearray, ostream);
   1066  1.1  christos   if (outfile)
   1067  1.1  christos     fclose (ostream);
   1068  1.1  christos 
   1069  1.1  christos   free (linearray);
   1070  1.1  christos   free (data);
   1071  1.1  christos }
   1072  1.1  christos 
   1073  1.1  christos /* Parse an input string in core into lines.
   1075  1.1  christos    DATA is the input string, and SIZE is its length.
   1076  1.1  christos    Data goes in LINEARRAY starting at NEXTLINE.
   1077  1.1  christos    The value returned is the first entry in LINEARRAY still unused.
   1078  1.1  christos    Value 0 means input file contents are invalid.  */
   1079  1.1  christos 
   1080  1.1  christos char **
   1081  1.1  christos parsefile (char *filename, char **nextline, char *data, long int size)
   1082  1.1  christos {
   1083  1.1  christos   char *p, *end;
   1084  1.1  christos   char **line = nextline;
   1085  1.1  christos 
   1086  1.1  christos   p = data;
   1087  1.1  christos   end = p + size;
   1088  1.1  christos   *end = 0;
   1089  1.1  christos 
   1090  1.1  christos   while (p != end)
   1091  1.1  christos     {
   1092  1.1  christos       if (p[0] != '\\' && p[0] != '@')
   1093  1.1  christos         return 0;
   1094  1.1  christos 
   1095  1.1  christos       *line = p;
   1096  1.1  christos 
   1097  1.1  christos       /* Find the first letter of the first field of this line.  If it
   1098  1.1  christos          is different from the first letter of the first field of the
   1099  1.1  christos          first line, we need initial headers in the output index.  */
   1100  1.1  christos       while (*p && *p != '{')
   1101  1.1  christos         p++;
   1102  1.1  christos       if (p == end)
   1103  1.1  christos         return 0;
   1104  1.1  christos       p++;
   1105  1.1  christos       if (first_initial)
   1106  1.1  christos         {
   1107  1.1  christos           if (first_initial != toupper (*p))
   1108  1.1  christos             need_initials = 1;
   1109  1.1  christos         }
   1110  1.1  christos       else
   1111  1.1  christos         first_initial = toupper (*p);
   1112  1.1  christos 
   1113  1.1  christos       while (*p && *p != '\n')
   1114  1.1  christos         p++;
   1115  1.1  christos       if (p != end)
   1116  1.1  christos         p++;
   1117  1.1  christos 
   1118  1.1  christos       line++;
   1119  1.1  christos       if (line == linearray + nlines)
   1120  1.1  christos         {
   1121  1.1  christos           char **old = linearray;
   1122  1.1  christos           linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4));
   1123  1.1  christos           line += linearray - old;
   1124  1.1  christos         }
   1125  1.1  christos     }
   1126  1.1  christos 
   1127  1.1  christos   return line;
   1128  1.1  christos }
   1129  1.1  christos 
   1130  1.1  christos /* Indexification is a filter applied to the sorted lines
   1132  1.1  christos    as they are being written to the output file.
   1133  1.1  christos    Multiple entries for the same name, with different page numbers,
   1134  1.1  christos    get combined into a single entry with multiple page numbers.
   1135  1.1  christos    The first braced field, which is used for sorting, is discarded.
   1136  1.1  christos    However, its first character is examined, folded to lower case,
   1137  1.1  christos    and if it is different from that in the previous line fed to us
   1138  1.1  christos    a \initial line is written with one argument, the new initial.
   1139  1.1  christos 
   1140  1.1  christos    If an entry has four braced fields, then the second and third
   1141  1.1  christos    constitute primary and secondary names.
   1142  1.1  christos    In this case, each change of primary name
   1143  1.1  christos    generates a \primary line which contains only the primary name,
   1144  1.1  christos    and in between these are \secondary lines which contain
   1145  1.1  christos    just a secondary name and page numbers. */
   1146  1.1  christos 
   1147  1.1  christos /* The last primary name we wrote a \primary entry for.
   1148  1.1  christos    If only one level of indexing is being done, this is the last name seen. */
   1149  1.1  christos char *lastprimary;
   1150  1.1  christos /* Length of storage allocated for lastprimary. */
   1151  1.1  christos int lastprimarylength;
   1152  1.1  christos 
   1153  1.1  christos /* Similar, for the secondary name. */
   1154  1.1  christos char *lastsecondary;
   1155  1.1  christos int lastsecondarylength;
   1156  1.1  christos 
   1157  1.1  christos /* Zero if we are not in the middle of writing an entry.
   1158  1.1  christos    One if we have written the beginning of an entry but have not
   1159  1.1  christos    yet written any page numbers into it.
   1160  1.1  christos    Greater than one if we have written the beginning of an entry
   1161  1.1  christos    plus at least one page number. */
   1162  1.1  christos int pending;
   1163  1.1  christos 
   1164  1.1  christos /* The initial (for sorting purposes) of the last primary entry written.
   1165  1.1  christos    When this changes, a \initial {c} line is written */
   1166  1.1  christos 
   1167  1.1  christos char *lastinitial;
   1168  1.1  christos 
   1169  1.1  christos int lastinitiallength;
   1170  1.1  christos 
   1171  1.1  christos /* When we need a string of length 1 for the value of lastinitial,
   1172  1.1  christos    store it here.  */
   1173  1.1  christos 
   1174  1.1  christos char lastinitial1[2];
   1175  1.1  christos 
   1176  1.1  christos /* Initialize static storage for writing an index. */
   1177  1.1  christos 
   1178  1.1  christos void
   1179  1.1  christos init_index (void)
   1180  1.1  christos {
   1181  1.1  christos   pending = 0;
   1182  1.1  christos   lastinitial = lastinitial1;
   1183  1.1  christos   lastinitial1[0] = 0;
   1184  1.1  christos   lastinitial1[1] = 0;
   1185  1.1  christos   lastinitiallength = 0;
   1186  1.1  christos   lastprimarylength = 100;
   1187  1.1  christos   lastprimary = (char *) xmalloc (lastprimarylength + 1);
   1188  1.1  christos   memset (lastprimary, '\0', lastprimarylength + 1);
   1189  1.1  christos   lastsecondarylength = 100;
   1190  1.1  christos   lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
   1191  1.1  christos   memset (lastsecondary, '\0', lastsecondarylength + 1);
   1192  1.1  christos }
   1193  1.1  christos 
   1194  1.1  christos /* Indexify.  Merge entries for the same name,
   1195  1.1  christos    insert headers for each initial character, etc.  */
   1196  1.1  christos 
   1197  1.1  christos void
   1198  1.1  christos indexify (char *line, FILE *ostream)
   1199  1.1  christos {
   1200  1.1  christos   char *primary, *secondary, *pagenumber;
   1201  1.1  christos   int primarylength, secondarylength = 0, pagelength;
   1202  1.1  christos   int nosecondary;
   1203  1.1  christos   int initiallength;
   1204  1.1  christos   char *initial;
   1205  1.1  christos   char initial1[2];
   1206  1.1  christos   register char *p;
   1207  1.1  christos 
   1208  1.1  christos   /* First, analyze the parts of the entry fed to us this time. */
   1209  1.1  christos 
   1210  1.1  christos   p = find_braced_pos (line, 0, 0, 0);
   1211  1.1  christos   if (*p == '{')
   1212  1.1  christos     {
   1213  1.1  christos       initial = p;
   1214  1.1  christos       /* Get length of inner pair of braces starting at `p',
   1215  1.1  christos          including that inner pair of braces.  */
   1216  1.1  christos       initiallength = find_braced_end (p + 1) + 1 - p;
   1217  1.1  christos     }
   1218  1.1  christos   else
   1219  1.1  christos     {
   1220  1.1  christos       initial = initial1;
   1221  1.1  christos       initial1[0] = toupper (*p);
   1222  1.1  christos       initial1[1] = 0;
   1223  1.1  christos       initiallength = 1;
   1224  1.1  christos     }
   1225  1.1  christos 
   1226  1.1  christos   pagenumber = find_braced_pos (line, 1, 0, 0);
   1227  1.1  christos   pagelength = find_braced_end (pagenumber) - pagenumber;
   1228  1.1  christos   if (pagelength == 0)
   1229  1.1  christos     fatal (_("No page number in %s"), line);
   1230  1.1  christos 
   1231  1.1  christos   primary = find_braced_pos (line, 2, 0, 0);
   1232  1.1  christos   primarylength = find_braced_end (primary) - primary;
   1233  1.1  christos 
   1234  1.1  christos   secondary = find_braced_pos (line, 3, 0, 0);
   1235  1.1  christos   nosecondary = !*secondary;
   1236  1.1  christos   if (!nosecondary)
   1237  1.1  christos     secondarylength = find_braced_end (secondary) - secondary;
   1238  1.1  christos 
   1239  1.1  christos   /* If the primary is different from before, make a new primary entry. */
   1240  1.1  christos   if (strncmp (primary, lastprimary, primarylength))
   1241  1.1  christos     {
   1242  1.1  christos       /* Close off current secondary entry first, if one is open. */
   1243  1.1  christos       if (pending)
   1244  1.1  christos         {
   1245  1.1  christos           fputs ("}\n", ostream);
   1246  1.1  christos           pending = 0;
   1247  1.1  christos         }
   1248  1.1  christos 
   1249  1.1  christos       /* If this primary has a different initial, include an entry for
   1250  1.1  christos          the initial. */
   1251  1.1  christos       if (need_initials &&
   1252  1.1  christos           (initiallength != lastinitiallength ||
   1253  1.1  christos            strncmp (initial, lastinitial, initiallength)))
   1254  1.1  christos         {
   1255  1.1  christos           fprintf (ostream, "\\initial {");
   1256  1.1  christos           fwrite (initial, 1, initiallength, ostream);
   1257  1.1  christos           fputs ("}\n", ostream);
   1258  1.1  christos           if (initial == initial1)
   1259  1.1  christos             {
   1260  1.1  christos               lastinitial = lastinitial1;
   1261  1.1  christos               *lastinitial1 = *initial1;
   1262  1.1  christos             }
   1263  1.1  christos           else
   1264  1.1  christos             {
   1265  1.1  christos               lastinitial = initial;
   1266  1.1  christos             }
   1267  1.1  christos           lastinitiallength = initiallength;
   1268  1.1  christos         }
   1269  1.1  christos 
   1270  1.1  christos       /* Make the entry for the primary.  */
   1271  1.1  christos       if (nosecondary)
   1272  1.1  christos         fputs ("\\entry {", ostream);
   1273  1.1  christos       else
   1274  1.1  christos         fputs ("\\primary {", ostream);
   1275  1.1  christos       fwrite (primary, primarylength, 1, ostream);
   1276  1.1  christos       if (nosecondary)
   1277  1.1  christos         {
   1278  1.1  christos           fputs ("}{", ostream);
   1279  1.1  christos           pending = 1;
   1280  1.1  christos         }
   1281  1.1  christos       else
   1282  1.1  christos         fputs ("}\n", ostream);
   1283  1.1  christos 
   1284  1.1  christos       /* Record name of most recent primary. */
   1285  1.1  christos       if (lastprimarylength < primarylength)
   1286  1.1  christos         {
   1287  1.1  christos           lastprimarylength = primarylength + 100;
   1288  1.1  christos           lastprimary = (char *) xrealloc (lastprimary,
   1289  1.1  christos                                            1 + lastprimarylength);
   1290  1.1  christos         }
   1291  1.1  christos       strncpy (lastprimary, primary, primarylength);
   1292  1.1  christos       lastprimary[primarylength] = 0;
   1293  1.1  christos 
   1294  1.1  christos       /* There is no current secondary within this primary, now. */
   1295  1.1  christos       lastsecondary[0] = 0;
   1296  1.1  christos     }
   1297  1.1  christos 
   1298  1.1  christos   /* Should not have an entry with no subtopic following one with a
   1299  1.1  christos      subtopic. */
   1300  1.1  christos 
   1301  1.1  christos   if (nosecondary && *lastsecondary)
   1302  1.1  christos     error (_("entry %s follows an entry with a secondary name"), line);
   1303  1.1  christos 
   1304  1.1  christos   /* Start a new secondary entry if necessary. */
   1305  1.1  christos   if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
   1306  1.1  christos     {
   1307  1.1  christos       if (pending)
   1308  1.1  christos         {
   1309  1.1  christos           fputs ("}\n", ostream);
   1310  1.1  christos           pending = 0;
   1311  1.1  christos         }
   1312  1.1  christos 
   1313  1.1  christos       /* Write the entry for the secondary.  */
   1314  1.1  christos       fputs ("\\secondary {", ostream);
   1315  1.1  christos       fwrite (secondary, secondarylength, 1, ostream);
   1316  1.1  christos       fputs ("}{", ostream);
   1317  1.1  christos       pending = 1;
   1318  1.1  christos 
   1319  1.1  christos       /* Record name of most recent secondary. */
   1320  1.1  christos       if (lastsecondarylength < secondarylength)
   1321  1.1  christos         {
   1322  1.1  christos           lastsecondarylength = secondarylength + 100;
   1323  1.1  christos           lastsecondary = (char *) xrealloc (lastsecondary,
   1324  1.1  christos                                              1 + lastsecondarylength);
   1325  1.1  christos         }
   1326  1.1  christos       strncpy (lastsecondary, secondary, secondarylength);
   1327  1.1  christos       lastsecondary[secondarylength] = 0;
   1328  1.1  christos     }
   1329  1.1  christos 
   1330  1.1  christos   /* Here to add one more page number to the current entry. */
   1331  1.1  christos   if (pending++ != 1)
   1332  1.1  christos     fputs (", ", ostream);  /* Punctuate first, if this is not the first. */
   1333  1.1  christos   fwrite (pagenumber, pagelength, 1, ostream);
   1334  1.1  christos }
   1335  1.1  christos 
   1336  1.1  christos /* Close out any unfinished output entry. */
   1337  1.1  christos 
   1338  1.1  christos void
   1339  1.1  christos finish_index (FILE *ostream)
   1340  1.1  christos {
   1341  1.1  christos   if (pending)
   1342  1.1  christos     fputs ("}\n", ostream);
   1343  1.1  christos   free (lastprimary);
   1344  1.1  christos   free (lastsecondary);
   1345  1.1  christos }
   1346  1.1  christos 
   1347  1.1  christos /* Copy the lines in the sorted order.
   1349  1.1  christos    Each line is copied out of the input file it was found in. */
   1350  1.1  christos 
   1351  1.1  christos void
   1352  1.1  christos writelines (char **linearray, int nlines, FILE *ostream)
   1353  1.1  christos {
   1354  1.1  christos   char **stop_line = linearray + nlines;
   1355  1.1  christos   char **next_line;
   1356  1.1  christos 
   1357  1.1  christos   init_index ();
   1358  1.1  christos 
   1359  1.1  christos   /* Output the text of the lines, and free the buffer space. */
   1360  1.1  christos 
   1361  1.1  christos   for (next_line = linearray; next_line != stop_line; next_line++)
   1362  1.1  christos     {
   1363  1.1  christos       /* If -u was specified, output the line only if distinct from
   1364  1.1  christos          previous one.  */
   1365  1.1  christos       if (next_line == linearray
   1366  1.1  christos       /* Compare previous line with this one, using only the
   1367  1.1  christos          explicitly specd keyfields. */
   1368  1.1  christos           || compare_general (*(next_line - 1), *next_line, 0L, 0L,
   1369  1.1  christos                               num_keyfields - 1))
   1370  1.1  christos         {
   1371  1.1  christos           char *p = *next_line;
   1372  1.1  christos           char c;
   1373  1.1  christos 
   1374  1.1  christos           while ((c = *p++) && c != '\n')
   1375  1.1  christos             /* Do nothing. */ ;
   1376  1.1  christos           *(p - 1) = 0;
   1377  1.1  christos           indexify (*next_line, ostream);
   1378  1.1  christos         }
   1379  1.1  christos     }
   1380  1.1  christos 
   1381  1.1  christos   finish_index (ostream);
   1382  1.1  christos }
   1383  1.1  christos 
   1384  1.1  christos /* Assume (and optionally verify) that each input file is sorted;
   1386  1.1  christos    merge them and output the result.
   1387  1.1  christos    Returns nonzero if any input file fails to be sorted.
   1388  1.1  christos 
   1389  1.1  christos    This is the high-level interface that can handle an unlimited
   1390  1.1  christos    number of files.  */
   1391  1.1  christos 
   1392  1.1  christos #define MAX_DIRECT_MERGE 10
   1393  1.1  christos 
   1394  1.1  christos int
   1395  1.1  christos merge_files (char **infiles, int nfiles, char *outfile)
   1396  1.1  christos {
   1397  1.1  christos   char **tempfiles;
   1398  1.1  christos   int ntemps;
   1399  1.1  christos   int i;
   1400  1.1  christos   int value = 0;
   1401  1.1  christos   int start_tempcount = tempcount;
   1402  1.1  christos 
   1403  1.1  christos   if (nfiles <= MAX_DIRECT_MERGE)
   1404  1.1  christos     return merge_direct (infiles, nfiles, outfile);
   1405  1.1  christos 
   1406  1.1  christos   /* Merge groups of MAX_DIRECT_MERGE input files at a time,
   1407  1.1  christos      making a temporary file to hold each group's result.  */
   1408  1.1  christos 
   1409  1.1  christos   ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
   1410  1.1  christos   tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
   1411  1.1  christos   for (i = 0; i < ntemps; i++)
   1412  1.1  christos     {
   1413  1.1  christos       int nf = MAX_DIRECT_MERGE;
   1414  1.1  christos       if (i + 1 == ntemps)
   1415  1.1  christos         nf = nfiles - i * MAX_DIRECT_MERGE;
   1416  1.1  christos       tempfiles[i] = maketempname (++tempcount);
   1417  1.1  christos       value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
   1418  1.1  christos     }
   1419  1.1  christos 
   1420  1.1  christos   /* All temporary files that existed before are no longer needed
   1421  1.1  christos      since their contents have been merged into our new tempfiles.
   1422  1.1  christos      So delete them.  */
   1423  1.1  christos   flush_tempfiles (start_tempcount);
   1424  1.1  christos 
   1425  1.1  christos   /* Now merge the temporary files we created.  */
   1426  1.1  christos 
   1427  1.1  christos   merge_files (tempfiles, ntemps, outfile);
   1428  1.1  christos 
   1429  1.1  christos   free (tempfiles);
   1430  1.1  christos 
   1431  1.1  christos   return value;
   1432  1.1  christos }
   1433  1.1  christos 
   1434  1.1  christos /* Assume (and optionally verify) that each input file is sorted;
   1436  1.1  christos    merge them and output the result.
   1437  1.1  christos    Returns nonzero if any input file fails to be sorted.
   1438  1.1  christos 
   1439  1.1  christos    This version of merging will not work if the number of
   1440  1.1  christos    input files gets too high.  Higher level functions
   1441  1.1  christos    use it only with a bounded number of input files.  */
   1442  1.1  christos 
   1443  1.1  christos int
   1444  1.1  christos merge_direct (char **infiles, int nfiles, char *outfile)
   1445  1.1  christos {
   1446  1.1  christos   struct linebuffer *lb1, *lb2;
   1447  1.1  christos   struct linebuffer **thisline, **prevline;
   1448  1.1  christos   FILE **streams;
   1449  1.1  christos   int i;
   1450  1.1  christos   int nleft;
   1451  1.1  christos   int lossage = 0;
   1452  1.1  christos   int *file_lossage;
   1453  1.1  christos   struct linebuffer *prev_out = 0;
   1454  1.1  christos   FILE *ostream = stdout;
   1455  1.1  christos 
   1456  1.1  christos   if (outfile)
   1457  1.1  christos     {
   1458  1.1  christos       ostream = fopen (outfile, "w");
   1459  1.1  christos     }
   1460  1.1  christos   if (!ostream)
   1461  1.1  christos     pfatal_with_name (outfile);
   1462  1.1  christos 
   1463  1.1  christos   init_index ();
   1464  1.1  christos 
   1465  1.1  christos   if (nfiles == 0)
   1466  1.1  christos     {
   1467  1.1  christos       if (outfile)
   1468  1.1  christos         fclose (ostream);
   1469  1.1  christos       return 0;
   1470  1.1  christos     }
   1471  1.1  christos 
   1472  1.1  christos   /* For each file, make two line buffers.  Also, for each file, there
   1473  1.1  christos      is an element of `thisline' which points at any time to one of the
   1474  1.1  christos      file's two buffers, and an element of `prevline' which points to
   1475  1.1  christos      the other buffer.  `thisline' is supposed to point to the next
   1476  1.1  christos      available line from the file, while `prevline' holds the last file
   1477  1.1  christos      line used, which is remembered so that we can verify that the file
   1478  1.1  christos      is properly sorted. */
   1479  1.1  christos 
   1480  1.1  christos   /* lb1 and lb2 contain one buffer each per file. */
   1481  1.1  christos   lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
   1482  1.1  christos   lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
   1483  1.1  christos 
   1484  1.1  christos   /* thisline[i] points to the linebuffer holding the next available
   1485  1.1  christos      line in file i, or is zero if there are no lines left in that file.  */
   1486  1.1  christos   thisline = (struct linebuffer **)
   1487  1.1  christos     xmalloc (nfiles * sizeof (struct linebuffer *));
   1488  1.1  christos   /* prevline[i] points to the linebuffer holding the last used line
   1489  1.1  christos      from file i.  This is just for verifying that file i is properly
   1490  1.1  christos      sorted.  */
   1491  1.1  christos   prevline = (struct linebuffer **)
   1492  1.1  christos     xmalloc (nfiles * sizeof (struct linebuffer *));
   1493  1.1  christos   /* streams[i] holds the input stream for file i.  */
   1494  1.1  christos   streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
   1495  1.1  christos   /* file_lossage[i] is nonzero if we already know file i is not
   1496  1.1  christos      properly sorted.  */
   1497  1.1  christos   file_lossage = (int *) xmalloc (nfiles * sizeof (int));
   1498  1.1  christos 
   1499  1.1  christos   /* Allocate and initialize all that storage. */
   1500  1.1  christos 
   1501  1.1  christos   for (i = 0; i < nfiles; i++)
   1502  1.1  christos     {
   1503  1.1  christos       initbuffer (&lb1[i]);
   1504  1.1  christos       initbuffer (&lb2[i]);
   1505  1.1  christos       thisline[i] = &lb1[i];
   1506  1.1  christos       prevline[i] = &lb2[i];
   1507  1.1  christos       file_lossage[i] = 0;
   1508  1.1  christos       streams[i] = fopen (infiles[i], "r");
   1509  1.1  christos       if (!streams[i])
   1510  1.1  christos         pfatal_with_name (infiles[i]);
   1511  1.1  christos 
   1512  1.1  christos       readline (thisline[i], streams[i]);
   1513  1.1  christos     }
   1514  1.1  christos 
   1515  1.1  christos   /* Keep count of number of files not at eof. */
   1516  1.1  christos   nleft = nfiles;
   1517  1.1  christos 
   1518  1.1  christos   while (nleft)
   1519  1.1  christos     {
   1520  1.1  christos       struct linebuffer *best = 0;
   1521  1.1  christos       struct linebuffer *exch;
   1522  1.1  christos       int bestfile = -1;
   1523  1.1  christos       int i;
   1524  1.1  christos 
   1525  1.1  christos       /* Look at the next avail line of each file; choose the least one.  */
   1526  1.1  christos 
   1527  1.1  christos       for (i = 0; i < nfiles; i++)
   1528  1.1  christos         {
   1529  1.1  christos           if (thisline[i] &&
   1530  1.1  christos               (!best ||
   1531  1.1  christos                0 < compare_general (best->buffer, thisline[i]->buffer,
   1532  1.1  christos                                  (long) bestfile, (long) i, num_keyfields)))
   1533  1.1  christos             {
   1534  1.1  christos               best = thisline[i];
   1535  1.1  christos               bestfile = i;
   1536  1.1  christos             }
   1537  1.1  christos         }
   1538  1.1  christos 
   1539  1.1  christos       /* Output that line, unless it matches the previous one and we
   1540  1.1  christos          don't want duplicates. */
   1541  1.1  christos 
   1542  1.1  christos       if (!(prev_out &&
   1543  1.1  christos             !compare_general (prev_out->buffer,
   1544  1.1  christos                               best->buffer, 0L, 1L, num_keyfields - 1)))
   1545  1.1  christos         indexify (best->buffer, ostream);
   1546  1.1  christos       prev_out = best;
   1547  1.1  christos 
   1548  1.1  christos       /* Now make the line the previous of its file, and fetch a new
   1549  1.1  christos          line from that file.  */
   1550  1.1  christos 
   1551  1.1  christos       exch = prevline[bestfile];
   1552  1.1  christos       prevline[bestfile] = thisline[bestfile];
   1553  1.1  christos       thisline[bestfile] = exch;
   1554  1.1  christos 
   1555  1.1  christos       while (1)
   1556  1.1  christos         {
   1557  1.1  christos           /* If the file has no more, mark it empty. */
   1558  1.1  christos 
   1559  1.1  christos           if (feof (streams[bestfile]))
   1560  1.1  christos             {
   1561  1.1  christos               thisline[bestfile] = 0;
   1562  1.1  christos               /* Update the number of files still not empty. */
   1563  1.1  christos               nleft--;
   1564  1.1  christos               break;
   1565  1.1  christos             }
   1566  1.1  christos           readline (thisline[bestfile], streams[bestfile]);
   1567  1.1  christos           if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile]))
   1568  1.1  christos             break;
   1569  1.1  christos         }
   1570  1.1  christos     }
   1571  1.1  christos 
   1572  1.1  christos   finish_index (ostream);
   1573  1.1  christos 
   1574  1.1  christos   /* Free all storage and close all input streams. */
   1575  1.1  christos 
   1576  1.1  christos   for (i = 0; i < nfiles; i++)
   1577  1.1  christos     {
   1578  1.1  christos       fclose (streams[i]);
   1579  1.1  christos       free (lb1[i].buffer);
   1580  1.1  christos       free (lb2[i].buffer);
   1581  1.1  christos     }
   1582  1.1  christos   free (file_lossage);
   1583  1.1  christos   free (lb1);
   1584  1.1  christos   free (lb2);
   1585  1.1  christos   free (thisline);
   1586  1.1  christos   free (prevline);
   1587  1.1  christos   free (streams);
   1588  1.1  christos 
   1589  1.1  christos   if (outfile)
   1590  1.1  christos     fclose (ostream);
   1591  1.1  christos 
   1592  1.1  christos   return lossage;
   1593  1.1  christos }
   1594  1.1  christos 
   1595  1.1  christos /* Print error message and exit.  */
   1597  1.1  christos 
   1598  1.1  christos void
   1599  1.1  christos fatal (const char *format, const char *arg)
   1600  1.1  christos {
   1601  1.1  christos   error (format, arg);
   1602  1.1  christos   xexit (1);
   1603  1.1  christos }
   1604  1.1  christos 
   1605  1.1  christos /* Print error message.  FORMAT is printf control string, ARG is arg for it. */
   1606  1.1  christos void
   1607  1.1  christos error (const char *format, const char *arg)
   1608  1.1  christos {
   1609  1.1  christos   printf ("%s: ", program_name);
   1610  1.1  christos   printf (format, arg);
   1611  1.1  christos   if (format[strlen (format) -1] != '\n')
   1612  1.1  christos     printf ("\n");
   1613  1.1  christos }
   1614  1.1  christos 
   1615  1.1  christos void
   1616  1.1  christos perror_with_name (const char *name)
   1617  1.1  christos {
   1618  1.1  christos   fprintf (stderr, "%s: ", program_name);
   1619  1.1  christos   perror (name);
   1620  1.1  christos }
   1621  1.1  christos 
   1622  1.1  christos void
   1623  1.1  christos pfatal_with_name (const char *name)
   1624  1.1  christos {
   1625  1.1  christos   perror_with_name (name);
   1626  1.1  christos   xexit (1);
   1627  1.1  christos }
   1628  1.1  christos 
   1629  1.1  christos 
   1630  1.1  christos /* Return a newly-allocated string concatenating S1 and S2.  */
   1632                
   1633                char *
   1634                concat (char *s1, char *s2)
   1635                {
   1636                  int len1 = strlen (s1), len2 = strlen (s2);
   1637                  char *result = (char *) xmalloc (len1 + len2 + 1);
   1638                
   1639                  strcpy (result, s1);
   1640                  strcpy (result + len1, s2);
   1641                  *(result + len1 + len2) = 0;
   1642                
   1643                  return result;
   1644                }
   1645