Home | History | Annotate | Line # | Download | only in util
texindex.c revision 1.1.1.1
      1 /*	$NetBSD: texindex.c,v 1.1.1.1 2016/01/14 00:11:29 christos Exp $	*/
      2 
      3 /* texindex -- sort TeX index dribble output into an actual index.
      4    Id: texindex.c,v 1.11 2004/04/11 17:56:47 karl Exp
      5 
      6    Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001,
      7    2002, 2003, 2004 Free Software Foundation, Inc.
      8 
      9    This program is free software; you can redistribute it and/or modify
     10    it under the terms of the GNU General Public License as published by
     11    the Free Software Foundation; either version 2, or (at your option)
     12    any later version.
     13 
     14    This program is distributed in the hope that it will be useful,
     15    but WITHOUT ANY WARRANTY; without even the implied warranty of
     16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     17    GNU General Public License for more details.
     18 
     19    You should have received a copy of the GNU General Public License
     20    along with this program; if not, write to the Free Software
     21    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
     22 
     23 #include "system.h"
     24 #include <getopt.h>
     25 
     26 static char *program_name = "texindex";
     27 
     28 #if defined (emacs)
     29 #  include "../src/config.h"
     30 /* Some s/os.h files redefine these. */
     31 #  undef read
     32 #  undef close
     33 #  undef write
     34 #  undef open
     35 #endif
     36 
     37 #if !defined (HAVE_MEMSET)
     38 #undef memset
     39 #define memset(ptr, ignore, count) bzero (ptr, count)
     40 #endif
     41 
     42 char *mktemp (char *);
     43 
     44 #if !defined (SEEK_SET)
     45 #  define SEEK_SET 0
     46 #  define SEEK_CUR 1
     47 #  define SEEK_END 2
     48 #endif /* !SEEK_SET */
     49 
     50 struct linebuffer;
     51 
     52 /* When sorting in core, this structure describes one line
     53    and the position and length of its first keyfield.  */
     54 struct lineinfo
     55 {
     56   char *text;           /* The actual text of the line. */
     57   union {
     58     char *text;         /* The start of the key (for textual comparison). */
     59     long number;        /* The numeric value (for numeric comparison). */
     60   } key;
     61   long keylen;          /* Length of KEY field. */
     62 };
     63 
     64 /* This structure describes a field to use as a sort key. */
     65 struct keyfield
     66 {
     67   int startwords;       /* Number of words to skip. */
     68   int startchars;       /* Number of additional chars to skip. */
     69   int endwords;         /* Number of words to ignore at end. */
     70   int endchars;         /* Ditto for characters of last word. */
     71   char ignore_blanks;   /* Non-zero means ignore spaces and tabs. */
     72   char fold_case;       /* Non-zero means case doesn't matter. */
     73   char reverse;         /* Non-zero means compare in reverse order. */
     74   char numeric;         /* Non-zeros means field is ASCII numeric. */
     75   char positional;      /* Sort according to file position. */
     76   char braced;          /* Count balanced-braced groupings as fields. */
     77 };
     78 
     79 /* Vector of keyfields to use. */
     80 struct keyfield keyfields[3];
     81 
     82 /* Number of keyfields stored in that vector.  */
     83 int num_keyfields = 3;
     84 
     85 /* Vector of input file names, terminated with a null pointer. */
     86 char **infiles;
     87 
     88 /* Vector of corresponding output file names, or NULL, meaning default it
     89    (add an `s' to the end). */
     90 char **outfiles;
     91 
     92 /* Length of `infiles'. */
     93 int num_infiles;
     94 
     95 /* Pointer to the array of pointers to lines being sorted. */
     96 char **linearray;
     97 
     98 /* The allocated length of `linearray'. */
     99 long nlines;
    100 
    101 /* Directory to use for temporary files.  On Unix, it ends with a slash.  */
    102 char *tempdir;
    103 
    104 /* Number of last temporary file.  */
    105 int tempcount;
    106 
    107 /* Number of last temporary file already deleted.
    108    Temporary files are deleted by `flush_tempfiles' in order of creation.  */
    109 int last_deleted_tempcount;
    110 
    111 /* During in-core sort, this points to the base of the data block
    112    which contains all the lines of data.  */
    113 char *text_base;
    114 
    115 /* Initially 0; changed to 1 if we want initials in this index.  */
    116 int need_initials;
    117 
    118 /* Remembers the first initial letter seen in this index, so we can
    119    determine whether we need initials in the sorted form.  */
    120 char first_initial;
    121 
    122 /* Additional command switches .*/
    123 
    124 /* Nonzero means do not delete tempfiles -- for debugging. */
    125 int keep_tempfiles;
    126 
    127 /* Forward declarations of functions in this file. */
    128 void decode_command (int argc, char **argv);
    129 void sort_in_core (char *infile, int total, char *outfile);
    130 void sort_offline (char *infile, off_t total, char *outfile);
    131 char **parsefile (char *filename, char **nextline, char *data, long int size);
    132 char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr);
    133 char *find_pos (char *str, int words, int chars, int ignore_blanks);
    134 long find_value (char *start, long int length);
    135 char *find_braced_pos (char *str, int words, int chars, int ignore_blanks);
    136 char *find_braced_end (char *str);
    137 void writelines (char **linearray, int nlines, FILE *ostream);
    138 int compare_field (struct keyfield *keyfield, char *start1,
    139                    long int length1, long int pos1, char *start2,
    140                    long int length2, long int pos2);
    141 int compare_full (const void *, const void *);
    142 long readline (struct linebuffer *linebuffer, FILE *stream);
    143 int merge_files (char **infiles, int nfiles, char *outfile);
    144 int merge_direct (char **infiles, int nfiles, char *outfile);
    145 void pfatal_with_name (const char *name);
    146 void fatal (const char *format, const char *arg);
    147 void error (const char *format, const char *arg);
    148 void *xmalloc (), *xrealloc ();
    149 char *concat (char *s1, char *s2);
    150 void flush_tempfiles (int to_count);
    151 
    152 #define MAX_IN_CORE_SORT 500000
    154 
    155 int
    156 main (int argc, char **argv)
    157 {
    158   int i;
    159 
    160   tempcount = 0;
    161   last_deleted_tempcount = 0;
    162 
    163 #ifdef HAVE_SETLOCALE
    164   /* Set locale via LC_ALL.  */
    165   setlocale (LC_ALL, "");
    166 #endif
    167 
    168   /* Set the text message domain.  */
    169   bindtextdomain (PACKAGE, LOCALEDIR);
    170   textdomain (PACKAGE);
    171 
    172   /* In case we write to a redirected stdout that fails.  */
    173   /* not ready atexit (close_stdout); */
    174 
    175   /* Describe the kind of sorting to do. */
    176   /* The first keyfield uses the first braced field and folds case. */
    177   keyfields[0].braced = 1;
    178   keyfields[0].fold_case = 1;
    179   keyfields[0].endwords = -1;
    180   keyfields[0].endchars = -1;
    181 
    182   /* The second keyfield uses the second braced field, numerically. */
    183   keyfields[1].braced = 1;
    184   keyfields[1].numeric = 1;
    185   keyfields[1].startwords = 1;
    186   keyfields[1].endwords = -1;
    187   keyfields[1].endchars = -1;
    188 
    189   /* The third keyfield (which is ignored while discarding duplicates)
    190      compares the whole line. */
    191   keyfields[2].endwords = -1;
    192   keyfields[2].endchars = -1;
    193 
    194   decode_command (argc, argv);
    195 
    196   /* Process input files completely, one by one.  */
    197 
    198   for (i = 0; i < num_infiles; i++)
    199     {
    200       int desc;
    201       off_t ptr;
    202       char *outfile;
    203       struct stat instat;
    204 
    205       desc = open (infiles[i], O_RDONLY, 0);
    206       if (desc < 0)
    207         pfatal_with_name (infiles[i]);
    208 
    209       if (stat (infiles[i], &instat))
    210         pfatal_with_name (infiles[i]);
    211       if (S_ISDIR (instat.st_mode))
    212         {
    213 #ifdef EISDIR
    214           errno = EISDIR;
    215 #endif
    216           pfatal_with_name (infiles[i]);
    217         }
    218 
    219       lseek (desc, (off_t) 0, SEEK_END);
    220       ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR);
    221 
    222       close (desc);
    223 
    224       outfile = outfiles[i];
    225       if (!outfile)
    226         outfile = concat (infiles[i], "s");
    227 
    228       need_initials = 0;
    229       first_initial = '\0';
    230 
    231       if (ptr < MAX_IN_CORE_SORT)
    232         /* Sort a small amount of data. */
    233         sort_in_core (infiles[i], (int)ptr, outfile);
    234       else
    235         sort_offline (infiles[i], ptr, outfile);
    236     }
    237 
    238   flush_tempfiles (tempcount);
    239   xexit (0);
    240   return 0; /* Avoid bogus warnings.  */
    241 }
    242 
    243 typedef struct
    245 {
    246   char *long_name;
    247   char *short_name;
    248   int *variable_ref;
    249   int variable_value;
    250   char *arg_name;
    251   char *doc_string;
    252 } TEXINDEX_OPTION;
    253 
    254 TEXINDEX_OPTION texindex_options[] = {
    255   { "--help", "-h", (int *)NULL, 0, (char *)NULL,
    256       N_("display this help and exit") },
    257   { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL,
    258       N_("keep temporary files around after processing") },
    259   { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL,
    260       N_("do not keep temporary files around after processing (default)") },
    261   { "--output", "-o", (int *)NULL, 0, "FILE",
    262       N_("send output to FILE") },
    263   { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
    264       N_("display version information and exit") },
    265   { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
    266 };
    267 
    268 void
    269 usage (int result_value)
    270 {
    271   register int i;
    272   FILE *f = result_value ? stderr : stdout;
    273 
    274   fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name);
    275   fprintf (f, _("Generate a sorted index for each TeX output FILE.\n"));
    276   /* Avoid trigraph nonsense.  */
    277   fprintf (f,
    278 _("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"),
    279            '?', '?'); /* avoid trigraph in cat-id-tbl.c */
    280   fprintf (f, _("\nOptions:\n"));
    281 
    282   for (i = 0; texindex_options[i].long_name; i++)
    283     {
    284       putc (' ', f);
    285 
    286       if (texindex_options[i].short_name)
    287         fprintf (f, "%s, ", texindex_options[i].short_name);
    288 
    289       fprintf (f, "%s %s",
    290                texindex_options[i].long_name,
    291                texindex_options[i].arg_name
    292                ? texindex_options[i].arg_name : "");
    293 
    294       fprintf (f, "\t%s\n", _(texindex_options[i].doc_string));
    295     }
    296   fputs (_("\n\
    297 Email bug reports to bug-texinfo (at) gnu.org,\n\
    298 general questions and discussion to help-texinfo (at) gnu.org.\n\
    299 Texinfo home page: http://www.gnu.org/software/texinfo/"), f);
    300   fputs ("\n", f);
    301 
    302   xexit (result_value);
    303 }
    304 
    305 /* Decode the command line arguments to set the parameter variables
    306    and set up the vector of keyfields and the vector of input files. */
    307 
    308 void
    309 decode_command (int argc, char **argv)
    310 {
    311   int arg_index = 1;
    312   char **ip;
    313   char **op;
    314 
    315   /* Store default values into parameter variables. */
    316 
    317   tempdir = getenv ("TMPDIR");
    318   if (tempdir == NULL)
    319     tempdir = getenv ("TEMP");
    320   if (tempdir == NULL)
    321     tempdir = getenv ("TMP");
    322   if (tempdir == NULL)
    323     tempdir = DEFAULT_TMPDIR;
    324   else
    325     tempdir = concat (tempdir, "/");
    326 
    327   keep_tempfiles = 0;
    328 
    329   /* Allocate ARGC input files, which must be enough.  */
    330 
    331   infiles = (char **) xmalloc (argc * sizeof (char *));
    332   outfiles = (char **) xmalloc (argc * sizeof (char *));
    333   ip = infiles;
    334   op = outfiles;
    335 
    336   while (arg_index < argc)
    337     {
    338       char *arg = argv[arg_index++];
    339 
    340       if (*arg == '-')
    341         {
    342           if (strcmp (arg, "--version") == 0)
    343             {
    344               printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION);
    345               puts ("");
    346               puts ("Copyright (C) 2004 Free Software Foundation, Inc.");
    347               printf (_("There is NO warranty.  You may redistribute this software\n\
    348 under the terms of the GNU General Public License.\n\
    349 For more information about these matters, see the files named COPYING.\n"));
    350               xexit (0);
    351             }
    352           else if ((strcmp (arg, "--keep") == 0) ||
    353                    (strcmp (arg, "-k") == 0))
    354             {
    355               keep_tempfiles = 1;
    356             }
    357           else if ((strcmp (arg, "--help") == 0) ||
    358                    (strcmp (arg, "-h") == 0))
    359             {
    360               usage (0);
    361             }
    362           else if ((strcmp (arg, "--output") == 0) ||
    363                    (strcmp (arg, "-o") == 0))
    364             {
    365               if (argv[arg_index] != (char *)NULL)
    366                 {
    367                   arg_index++;
    368                   if (op > outfiles)
    369                     *(op - 1) = argv[arg_index];
    370                 }
    371               else
    372                 usage (1);
    373             }
    374           else
    375             usage (1);
    376         }
    377       else
    378         {
    379           *ip++ = arg;
    380           *op++ = (char *)NULL;
    381         }
    382     }
    383 
    384   /* Record number of keyfields and terminate list of filenames. */
    385   num_infiles = ip - infiles;
    386   *ip = (char *)NULL;
    387   if (num_infiles == 0)
    388     usage (1);
    389 }
    390 
    391 /* Return a name for temporary file COUNT. */
    393 
    394 static char *
    395 maketempname (int count)
    396 {
    397   static char *tempbase = NULL;
    398   char tempsuffix[10];
    399 
    400   if (!tempbase)
    401     {
    402       int fd;
    403       tempbase = concat (tempdir, "txidxXXXXXX");
    404 
    405       fd = mkstemp (tempbase);
    406       if (fd == -1)
    407         pfatal_with_name (tempbase);
    408     }
    409 
    410   sprintf (tempsuffix, ".%d", count);
    411   return concat (tempbase, tempsuffix);
    412 }
    413 
    414 
    415 /* Delete all temporary files up to TO_COUNT. */
    416 
    417 void
    418 flush_tempfiles (int to_count)
    419 {
    420   if (keep_tempfiles)
    421     return;
    422   while (last_deleted_tempcount < to_count)
    423     unlink (maketempname (++last_deleted_tempcount));
    424 }
    425 
    426 
    427 /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
    429 
    430 int
    431 compare_full (const void *p1, const void *p2)
    432 {
    433   char **line1 = (char **) p1;
    434   char **line2 = (char **) p2;
    435   int i;
    436 
    437   /* Compare using the first keyfield;
    438      if that does not distinguish the lines, try the second keyfield;
    439      and so on. */
    440 
    441   for (i = 0; i < num_keyfields; i++)
    442     {
    443       long length1, length2;
    444       char *start1 = find_field (&keyfields[i], *line1, &length1);
    445       char *start2 = find_field (&keyfields[i], *line2, &length2);
    446       int tem = compare_field (&keyfields[i], start1, length1,
    447                                *line1 - text_base,
    448                                start2, length2, *line2 - text_base);
    449       if (tem)
    450         {
    451           if (keyfields[i].reverse)
    452             return -tem;
    453           return tem;
    454         }
    455     }
    456 
    457   return 0;                     /* Lines match exactly. */
    458 }
    459 
    460 /* Compare LINE1 and LINE2, described by structures
    461    in which the first keyfield is identified in advance.
    462    For positional sorting, assumes that the order of the lines in core
    463    reflects their nominal order.  */
    464 int
    465 compare_prepared (const void *p1, const void *p2)
    466 {
    467   struct lineinfo *line1 = (struct lineinfo *) p1;
    468   struct lineinfo *line2 = (struct lineinfo *) p2;
    469   int i;
    470   int tem;
    471   char *text1, *text2;
    472 
    473   /* Compare using the first keyfield, which has been found for us already. */
    474   if (keyfields->positional)
    475     {
    476       if (line1->text - text_base > line2->text - text_base)
    477         tem = 1;
    478       else
    479         tem = -1;
    480     }
    481   else if (keyfields->numeric)
    482     tem = line1->key.number - line2->key.number;
    483   else
    484     tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
    485                          line2->key.text, line2->keylen, 0);
    486   if (tem)
    487     {
    488       if (keyfields->reverse)
    489         return -tem;
    490       return tem;
    491     }
    492 
    493   text1 = line1->text;
    494   text2 = line2->text;
    495 
    496   /* Compare using the second keyfield;
    497      if that does not distinguish the lines, try the third keyfield;
    498      and so on. */
    499 
    500   for (i = 1; i < num_keyfields; i++)
    501     {
    502       long length1, length2;
    503       char *start1 = find_field (&keyfields[i], text1, &length1);
    504       char *start2 = find_field (&keyfields[i], text2, &length2);
    505       int tem = compare_field (&keyfields[i], start1, length1,
    506                                text1 - text_base,
    507                                start2, length2, text2 - text_base);
    508       if (tem)
    509         {
    510           if (keyfields[i].reverse)
    511             return -tem;
    512           return tem;
    513         }
    514     }
    515 
    516   return 0;                     /* Lines match exactly. */
    517 }
    518 
    519 /* Like compare_full but more general.
    520    You can pass any strings, and you can say how many keyfields to use.
    521    POS1 and POS2 should indicate the nominal positional ordering of
    522    the two lines in the input.  */
    523 
    524 int
    525 compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields)
    526 {
    527   int i;
    528 
    529   /* Compare using the first keyfield;
    530      if that does not distinguish the lines, try the second keyfield;
    531      and so on. */
    532 
    533   for (i = 0; i < use_keyfields; i++)
    534     {
    535       long length1, length2;
    536       char *start1 = find_field (&keyfields[i], str1, &length1);
    537       char *start2 = find_field (&keyfields[i], str2, &length2);
    538       int tem = compare_field (&keyfields[i], start1, length1, pos1,
    539                                start2, length2, pos2);
    540       if (tem)
    541         {
    542           if (keyfields[i].reverse)
    543             return -tem;
    544           return tem;
    545         }
    546     }
    547 
    548   return 0;                     /* Lines match exactly. */
    549 }
    550 
    551 /* Find the start and length of a field in STR according to KEYFIELD.
    552    A pointer to the starting character is returned, and the length
    553    is stored into the int that LENGTHPTR points to.  */
    554 
    555 char *
    556 find_field (struct keyfield *keyfield, char *str, long int *lengthptr)
    557 {
    558   char *start;
    559   char *end;
    560   char *(*fun) ();
    561 
    562   if (keyfield->braced)
    563     fun = find_braced_pos;
    564   else
    565     fun = find_pos;
    566 
    567   start = (*fun) (str, keyfield->startwords, keyfield->startchars,
    568                   keyfield->ignore_blanks);
    569   if (keyfield->endwords < 0)
    570     {
    571       if (keyfield->braced)
    572         end = find_braced_end (start);
    573       else
    574         {
    575           end = start;
    576           while (*end && *end != '\n')
    577             end++;
    578         }
    579     }
    580   else
    581     {
    582       end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
    583       if (end - str < start - str)
    584         end = start;
    585     }
    586   *lengthptr = end - start;
    587   return start;
    588 }
    589 
    590 /* Return a pointer to a specified place within STR,
    591    skipping (from the beginning) WORDS words and then CHARS chars.
    592    If IGNORE_BLANKS is nonzero, we skip all blanks
    593    after finding the specified word.  */
    594 
    595 char *
    596 find_pos (char *str, int words, int chars, int ignore_blanks)
    597 {
    598   int i;
    599   char *p = str;
    600 
    601   for (i = 0; i < words; i++)
    602     {
    603       char c;
    604       /* Find next bunch of nonblanks and skip them. */
    605       while ((c = *p) == ' ' || c == '\t')
    606         p++;
    607       while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
    608         p++;
    609       if (!*p || *p == '\n')
    610         return p;
    611     }
    612 
    613   while (*p == ' ' || *p == '\t')
    614     p++;
    615 
    616   for (i = 0; i < chars; i++)
    617     {
    618       if (!*p || *p == '\n')
    619         break;
    620       p++;
    621     }
    622   return p;
    623 }
    624 
    625 /* Like find_pos but assumes that each field is surrounded by braces
    626    and that braces within fields are balanced. */
    627 
    628 char *
    629 find_braced_pos (char *str, int words, int chars, int ignore_blanks)
    630 {
    631   int i;
    632   int bracelevel;
    633   char *p = str;
    634   char c;
    635 
    636   for (i = 0; i < words; i++)
    637     {
    638       bracelevel = 1;
    639       while ((c = *p++) != '{' && c != '\n' && c)
    640         /* Do nothing. */ ;
    641       if (c != '{')
    642         return p - 1;
    643       while (bracelevel)
    644         {
    645           c = *p++;
    646           if (c == '{')
    647             bracelevel++;
    648           if (c == '}')
    649             bracelevel--;
    650           if (c == 0 || c == '\n')
    651             return p - 1;
    652         }
    653     }
    654 
    655   while ((c = *p++) != '{' && c != '\n' && c)
    656     /* Do nothing. */ ;
    657 
    658   if (c != '{')
    659     return p - 1;
    660 
    661   if (ignore_blanks)
    662     while ((c = *p) == ' ' || c == '\t')
    663       p++;
    664 
    665   for (i = 0; i < chars; i++)
    666     {
    667       if (!*p || *p == '\n')
    668         break;
    669       p++;
    670     }
    671   return p;
    672 }
    673 
    674 /* Find the end of the balanced-brace field which starts at STR.
    675    The position returned is just before the closing brace. */
    676 
    677 char *
    678 find_braced_end (char *str)
    679 {
    680   int bracelevel;
    681   char *p = str;
    682   char c;
    683 
    684   bracelevel = 1;
    685   while (bracelevel)
    686     {
    687       c = *p++;
    688       if (c == '{')
    689         bracelevel++;
    690       if (c == '}')
    691         bracelevel--;
    692       if (c == 0 || c == '\n')
    693         return p - 1;
    694     }
    695   return p - 1;
    696 }
    697 
    698 long
    699 find_value (char *start, long int length)
    700 {
    701   while (length != 0L)
    702     {
    703       if (isdigit (*start))
    704         return atol (start);
    705       length--;
    706       start++;
    707     }
    708   return 0l;
    709 }
    710 
    711 /* Vector used to translate characters for comparison.
    712    This is how we make all alphanumerics follow all else,
    713    and ignore case in the first sorting.  */
    714 int char_order[256];
    715 
    716 void
    717 init_char_order (void)
    718 {
    719   int i;
    720   for (i = 1; i < 256; i++)
    721     char_order[i] = i;
    722 
    723   for (i = '0'; i <= '9'; i++)
    724     char_order[i] += 512;
    725 
    726   for (i = 'a'; i <= 'z'; i++)
    727     {
    728       char_order[i] = 512 + i;
    729       char_order[i + 'A' - 'a'] = 512 + i;
    730     }
    731 }
    732 
    733 /* Compare two fields (each specified as a start pointer and a character count)
    734    according to KEYFIELD.
    735    The sign of the value reports the relation between the fields. */
    736 
    737 int
    738 compare_field (struct keyfield *keyfield, char *start1, long int length1,
    739                long int pos1, char *start2, long int length2, long int pos2)
    740 {
    741   if (keyfields->positional)
    742     {
    743       if (pos1 > pos2)
    744         return 1;
    745       else
    746         return -1;
    747     }
    748   if (keyfield->numeric)
    749     {
    750       long value = find_value (start1, length1) - find_value (start2, length2);
    751       if (value > 0)
    752         return 1;
    753       if (value < 0)
    754         return -1;
    755       return 0;
    756     }
    757   else
    758     {
    759       char *p1 = start1;
    760       char *p2 = start2;
    761       char *e1 = start1 + length1;
    762       char *e2 = start2 + length2;
    763 
    764       while (1)
    765         {
    766           int c1, c2;
    767 
    768           if (p1 == e1)
    769             c1 = 0;
    770           else
    771             c1 = *p1++;
    772           if (p2 == e2)
    773             c2 = 0;
    774           else
    775             c2 = *p2++;
    776 
    777           if (char_order[c1] != char_order[c2])
    778             return char_order[c1] - char_order[c2];
    779           if (!c1)
    780             break;
    781         }
    782 
    783       /* Strings are equal except possibly for case.  */
    784       p1 = start1;
    785       p2 = start2;
    786       while (1)
    787         {
    788           int c1, c2;
    789 
    790           if (p1 == e1)
    791             c1 = 0;
    792           else
    793             c1 = *p1++;
    794           if (p2 == e2)
    795             c2 = 0;
    796           else
    797             c2 = *p2++;
    798 
    799           if (c1 != c2)
    800             /* Reverse sign here so upper case comes out last.  */
    801             return c2 - c1;
    802           if (!c1)
    803             break;
    804         }
    805 
    806       return 0;
    807     }
    808 }
    809 
    810 /* A `struct linebuffer' is a structure which holds a line of text.
    812    `readline' reads a line from a stream into a linebuffer
    813    and works regardless of the length of the line.  */
    814 
    815 struct linebuffer
    816 {
    817   long size;
    818   char *buffer;
    819 };
    820 
    821 /* Initialize LINEBUFFER for use. */
    822 
    823 void
    824 initbuffer (struct linebuffer *linebuffer)
    825 {
    826   linebuffer->size = 200;
    827   linebuffer->buffer = (char *) xmalloc (200);
    828 }
    829 
    830 /* Read a line of text from STREAM into LINEBUFFER.
    831    Return the length of the line.  */
    832 
    833 long
    834 readline (struct linebuffer *linebuffer, FILE *stream)
    835 {
    836   char *buffer = linebuffer->buffer;
    837   char *p = linebuffer->buffer;
    838   char *end = p + linebuffer->size;
    839 
    840   while (1)
    841     {
    842       int c = getc (stream);
    843       if (p == end)
    844         {
    845           buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
    846           p += buffer - linebuffer->buffer;
    847           end += buffer - linebuffer->buffer;
    848           linebuffer->buffer = buffer;
    849         }
    850       if (c < 0 || c == '\n')
    851         {
    852           *p = 0;
    853           break;
    854         }
    855       *p++ = c;
    856     }
    857 
    858   return p - buffer;
    859 }
    860 
    861 /* Sort an input file too big to sort in core.  */
    863 
    864 void
    865 sort_offline (char *infile, off_t total, char *outfile)
    866 {
    867   /* More than enough. */
    868   int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;
    869   char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
    870   FILE *istream = fopen (infile, "r");
    871   int i;
    872   struct linebuffer lb;
    873   long linelength;
    874   int failure = 0;
    875 
    876   initbuffer (&lb);
    877 
    878   /* Read in one line of input data.  */
    879 
    880   linelength = readline (&lb, istream);
    881 
    882   if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
    883     {
    884       error (_("%s: not a texinfo index file"), infile);
    885       return;
    886     }
    887 
    888   /* Split up the input into `ntemps' temporary files, or maybe fewer,
    889      and put the new files' names into `tempfiles' */
    890 
    891   for (i = 0; i < ntemps; i++)
    892     {
    893       char *outname = maketempname (++tempcount);
    894       FILE *ostream = fopen (outname, "w");
    895       long tempsize = 0;
    896 
    897       if (!ostream)
    898         pfatal_with_name (outname);
    899       tempfiles[i] = outname;
    900 
    901       /* Copy lines into this temp file as long as it does not make file
    902          "too big" or until there are no more lines.  */
    903 
    904       while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
    905         {
    906           tempsize += linelength + 1;
    907           fputs (lb.buffer, ostream);
    908           putc ('\n', ostream);
    909 
    910           /* Read another line of input data.  */
    911 
    912           linelength = readline (&lb, istream);
    913           if (!linelength && feof (istream))
    914             break;
    915 
    916           if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
    917             {
    918               error (_("%s: not a texinfo index file"), infile);
    919               failure = 1;
    920               goto fail;
    921             }
    922         }
    923       fclose (ostream);
    924       if (feof (istream))
    925         break;
    926     }
    927 
    928   free (lb.buffer);
    929 
    930 fail:
    931   /* Record number of temp files we actually needed.  */
    932 
    933   ntemps = i;
    934 
    935   /* Sort each tempfile into another tempfile.
    936     Delete the first set of tempfiles and put the names of the second
    937     into `tempfiles'. */
    938 
    939   for (i = 0; i < ntemps; i++)
    940     {
    941       char *newtemp = maketempname (++tempcount);
    942       sort_in_core (tempfiles[i], MAX_IN_CORE_SORT, newtemp);
    943       if (!keep_tempfiles)
    944         unlink (tempfiles[i]);
    945       tempfiles[i] = newtemp;
    946     }
    947 
    948   if (failure)
    949     return;
    950 
    951   /* Merge the tempfiles together and indexify. */
    952 
    953   merge_files (tempfiles, ntemps, outfile);
    954 }
    955 
    956 /* Sort INFILE, whose size is TOTAL,
    958    assuming that is small enough to be done in-core,
    959    then indexify it and send the output to OUTFILE (or to stdout).  */
    960 
    961 void
    962 sort_in_core (char *infile, int total, char *outfile)
    963 {
    964   char **nextline;
    965   char *data = (char *) xmalloc (total + 1);
    966   char *file_data;
    967   long file_size;
    968   int i;
    969   FILE *ostream = stdout;
    970   struct lineinfo *lineinfo;
    971 
    972   /* Read the contents of the file into the moby array `data'. */
    973 
    974   int desc = open (infile, O_RDONLY, 0);
    975 
    976   if (desc < 0)
    977     fatal (_("failure reopening %s"), infile);
    978   for (file_size = 0;;)
    979     {
    980       i = read (desc, data + file_size, total - file_size);
    981       if (i <= 0)
    982         break;
    983       file_size += i;
    984     }
    985   file_data = data;
    986   data[file_size] = 0;
    987 
    988   close (desc);
    989 
    990   if (file_size > 0 && data[0] != '\\' && data[0] != '@')
    991     {
    992       error (_("%s: not a texinfo index file"), infile);
    993       return;
    994     }
    995 
    996   init_char_order ();
    997 
    998   /* Sort routines want to know this address. */
    999 
   1000   text_base = data;
   1001 
   1002   /* Create the array of pointers to lines, with a default size
   1003      frequently enough.  */
   1004 
   1005   nlines = total / 50;
   1006   if (!nlines)
   1007     nlines = 2;
   1008   linearray = (char **) xmalloc (nlines * sizeof (char *));
   1009 
   1010   /* `nextline' points to the next free slot in this array.
   1011      `nlines' is the allocated size.  */
   1012 
   1013   nextline = linearray;
   1014 
   1015   /* Parse the input file's data, and make entries for the lines.  */
   1016 
   1017   nextline = parsefile (infile, nextline, file_data, file_size);
   1018   if (nextline == 0)
   1019     {
   1020       error (_("%s: not a texinfo index file"), infile);
   1021       return;
   1022     }
   1023 
   1024   /* Sort the lines. */
   1025 
   1026   /* If we have enough space, find the first keyfield of each line in advance.
   1027      Make a `struct lineinfo' for each line, which records the keyfield
   1028      as well as the line, and sort them.  */
   1029 
   1030   lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo));
   1031 
   1032   if (lineinfo)
   1033     {
   1034       struct lineinfo *lp;
   1035       char **p;
   1036 
   1037       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
   1038         {
   1039           lp->text = *p;
   1040           lp->key.text = find_field (keyfields, *p, &lp->keylen);
   1041           if (keyfields->numeric)
   1042             lp->key.number = find_value (lp->key.text, lp->keylen);
   1043         }
   1044 
   1045       qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
   1046              compare_prepared);
   1047 
   1048       for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
   1049         *p = lp->text;
   1050 
   1051       free (lineinfo);
   1052     }
   1053   else
   1054     qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
   1055 
   1056   /* Open the output file. */
   1057 
   1058   if (outfile)
   1059     {
   1060       ostream = fopen (outfile, "w");
   1061       if (!ostream)
   1062         pfatal_with_name (outfile);
   1063     }
   1064 
   1065   writelines (linearray, nextline - linearray, ostream);
   1066   if (outfile)
   1067     fclose (ostream);
   1068 
   1069   free (linearray);
   1070   free (data);
   1071 }
   1072 
   1073 /* Parse an input string in core into lines.
   1075    DATA is the input string, and SIZE is its length.
   1076    Data goes in LINEARRAY starting at NEXTLINE.
   1077    The value returned is the first entry in LINEARRAY still unused.
   1078    Value 0 means input file contents are invalid.  */
   1079 
   1080 char **
   1081 parsefile (char *filename, char **nextline, char *data, long int size)
   1082 {
   1083   char *p, *end;
   1084   char **line = nextline;
   1085 
   1086   p = data;
   1087   end = p + size;
   1088   *end = 0;
   1089 
   1090   while (p != end)
   1091     {
   1092       if (p[0] != '\\' && p[0] != '@')
   1093         return 0;
   1094 
   1095       *line = p;
   1096 
   1097       /* Find the first letter of the first field of this line.  If it
   1098          is different from the first letter of the first field of the
   1099          first line, we need initial headers in the output index.  */
   1100       while (*p && *p != '{')
   1101         p++;
   1102       if (p == end)
   1103         return 0;
   1104       p++;
   1105       if (first_initial)
   1106         {
   1107           if (first_initial != toupper (*p))
   1108             need_initials = 1;
   1109         }
   1110       else
   1111         first_initial = toupper (*p);
   1112 
   1113       while (*p && *p != '\n')
   1114         p++;
   1115       if (p != end)
   1116         p++;
   1117 
   1118       line++;
   1119       if (line == linearray + nlines)
   1120         {
   1121           char **old = linearray;
   1122           linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4));
   1123           line += linearray - old;
   1124         }
   1125     }
   1126 
   1127   return line;
   1128 }
   1129 
   1130 /* Indexification is a filter applied to the sorted lines
   1132    as they are being written to the output file.
   1133    Multiple entries for the same name, with different page numbers,
   1134    get combined into a single entry with multiple page numbers.
   1135    The first braced field, which is used for sorting, is discarded.
   1136    However, its first character is examined, folded to lower case,
   1137    and if it is different from that in the previous line fed to us
   1138    a \initial line is written with one argument, the new initial.
   1139 
   1140    If an entry has four braced fields, then the second and third
   1141    constitute primary and secondary names.
   1142    In this case, each change of primary name
   1143    generates a \primary line which contains only the primary name,
   1144    and in between these are \secondary lines which contain
   1145    just a secondary name and page numbers. */
   1146 
   1147 /* The last primary name we wrote a \primary entry for.
   1148    If only one level of indexing is being done, this is the last name seen. */
   1149 char *lastprimary;
   1150 /* Length of storage allocated for lastprimary. */
   1151 int lastprimarylength;
   1152 
   1153 /* Similar, for the secondary name. */
   1154 char *lastsecondary;
   1155 int lastsecondarylength;
   1156 
   1157 /* Zero if we are not in the middle of writing an entry.
   1158    One if we have written the beginning of an entry but have not
   1159    yet written any page numbers into it.
   1160    Greater than one if we have written the beginning of an entry
   1161    plus at least one page number. */
   1162 int pending;
   1163 
   1164 /* The initial (for sorting purposes) of the last primary entry written.
   1165    When this changes, a \initial {c} line is written */
   1166 
   1167 char *lastinitial;
   1168 
   1169 int lastinitiallength;
   1170 
   1171 /* When we need a string of length 1 for the value of lastinitial,
   1172    store it here.  */
   1173 
   1174 char lastinitial1[2];
   1175 
   1176 /* Initialize static storage for writing an index. */
   1177 
   1178 void
   1179 init_index (void)
   1180 {
   1181   pending = 0;
   1182   lastinitial = lastinitial1;
   1183   lastinitial1[0] = 0;
   1184   lastinitial1[1] = 0;
   1185   lastinitiallength = 0;
   1186   lastprimarylength = 100;
   1187   lastprimary = (char *) xmalloc (lastprimarylength + 1);
   1188   memset (lastprimary, '\0', lastprimarylength + 1);
   1189   lastsecondarylength = 100;
   1190   lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
   1191   memset (lastsecondary, '\0', lastsecondarylength + 1);
   1192 }
   1193 
   1194 /* Indexify.  Merge entries for the same name,
   1195    insert headers for each initial character, etc.  */
   1196 
   1197 void
   1198 indexify (char *line, FILE *ostream)
   1199 {
   1200   char *primary, *secondary, *pagenumber;
   1201   int primarylength, secondarylength = 0, pagelength;
   1202   int nosecondary;
   1203   int initiallength;
   1204   char *initial;
   1205   char initial1[2];
   1206   register char *p;
   1207 
   1208   /* First, analyze the parts of the entry fed to us this time. */
   1209 
   1210   p = find_braced_pos (line, 0, 0, 0);
   1211   if (*p == '{')
   1212     {
   1213       initial = p;
   1214       /* Get length of inner pair of braces starting at `p',
   1215          including that inner pair of braces.  */
   1216       initiallength = find_braced_end (p + 1) + 1 - p;
   1217     }
   1218   else
   1219     {
   1220       initial = initial1;
   1221       initial1[0] = toupper (*p);
   1222       initial1[1] = 0;
   1223       initiallength = 1;
   1224     }
   1225 
   1226   pagenumber = find_braced_pos (line, 1, 0, 0);
   1227   pagelength = find_braced_end (pagenumber) - pagenumber;
   1228   if (pagelength == 0)
   1229     fatal (_("No page number in %s"), line);
   1230 
   1231   primary = find_braced_pos (line, 2, 0, 0);
   1232   primarylength = find_braced_end (primary) - primary;
   1233 
   1234   secondary = find_braced_pos (line, 3, 0, 0);
   1235   nosecondary = !*secondary;
   1236   if (!nosecondary)
   1237     secondarylength = find_braced_end (secondary) - secondary;
   1238 
   1239   /* If the primary is different from before, make a new primary entry. */
   1240   if (strncmp (primary, lastprimary, primarylength))
   1241     {
   1242       /* Close off current secondary entry first, if one is open. */
   1243       if (pending)
   1244         {
   1245           fputs ("}\n", ostream);
   1246           pending = 0;
   1247         }
   1248 
   1249       /* If this primary has a different initial, include an entry for
   1250          the initial. */
   1251       if (need_initials &&
   1252           (initiallength != lastinitiallength ||
   1253            strncmp (initial, lastinitial, initiallength)))
   1254         {
   1255           fprintf (ostream, "\\initial {");
   1256           fwrite (initial, 1, initiallength, ostream);
   1257           fputs ("}\n", ostream);
   1258           if (initial == initial1)
   1259             {
   1260               lastinitial = lastinitial1;
   1261               *lastinitial1 = *initial1;
   1262             }
   1263           else
   1264             {
   1265               lastinitial = initial;
   1266             }
   1267           lastinitiallength = initiallength;
   1268         }
   1269 
   1270       /* Make the entry for the primary.  */
   1271       if (nosecondary)
   1272         fputs ("\\entry {", ostream);
   1273       else
   1274         fputs ("\\primary {", ostream);
   1275       fwrite (primary, primarylength, 1, ostream);
   1276       if (nosecondary)
   1277         {
   1278           fputs ("}{", ostream);
   1279           pending = 1;
   1280         }
   1281       else
   1282         fputs ("}\n", ostream);
   1283 
   1284       /* Record name of most recent primary. */
   1285       if (lastprimarylength < primarylength)
   1286         {
   1287           lastprimarylength = primarylength + 100;
   1288           lastprimary = (char *) xrealloc (lastprimary,
   1289                                            1 + lastprimarylength);
   1290         }
   1291       strncpy (lastprimary, primary, primarylength);
   1292       lastprimary[primarylength] = 0;
   1293 
   1294       /* There is no current secondary within this primary, now. */
   1295       lastsecondary[0] = 0;
   1296     }
   1297 
   1298   /* Should not have an entry with no subtopic following one with a
   1299      subtopic. */
   1300 
   1301   if (nosecondary && *lastsecondary)
   1302     error (_("entry %s follows an entry with a secondary name"), line);
   1303 
   1304   /* Start a new secondary entry if necessary. */
   1305   if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
   1306     {
   1307       if (pending)
   1308         {
   1309           fputs ("}\n", ostream);
   1310           pending = 0;
   1311         }
   1312 
   1313       /* Write the entry for the secondary.  */
   1314       fputs ("\\secondary {", ostream);
   1315       fwrite (secondary, secondarylength, 1, ostream);
   1316       fputs ("}{", ostream);
   1317       pending = 1;
   1318 
   1319       /* Record name of most recent secondary. */
   1320       if (lastsecondarylength < secondarylength)
   1321         {
   1322           lastsecondarylength = secondarylength + 100;
   1323           lastsecondary = (char *) xrealloc (lastsecondary,
   1324                                              1 + lastsecondarylength);
   1325         }
   1326       strncpy (lastsecondary, secondary, secondarylength);
   1327       lastsecondary[secondarylength] = 0;
   1328     }
   1329 
   1330   /* Here to add one more page number to the current entry. */
   1331   if (pending++ != 1)
   1332     fputs (", ", ostream);  /* Punctuate first, if this is not the first. */
   1333   fwrite (pagenumber, pagelength, 1, ostream);
   1334 }
   1335 
   1336 /* Close out any unfinished output entry. */
   1337 
   1338 void
   1339 finish_index (FILE *ostream)
   1340 {
   1341   if (pending)
   1342     fputs ("}\n", ostream);
   1343   free (lastprimary);
   1344   free (lastsecondary);
   1345 }
   1346 
   1347 /* Copy the lines in the sorted order.
   1349    Each line is copied out of the input file it was found in. */
   1350 
   1351 void
   1352 writelines (char **linearray, int nlines, FILE *ostream)
   1353 {
   1354   char **stop_line = linearray + nlines;
   1355   char **next_line;
   1356 
   1357   init_index ();
   1358 
   1359   /* Output the text of the lines, and free the buffer space. */
   1360 
   1361   for (next_line = linearray; next_line != stop_line; next_line++)
   1362     {
   1363       /* If -u was specified, output the line only if distinct from
   1364          previous one.  */
   1365       if (next_line == linearray
   1366       /* Compare previous line with this one, using only the
   1367          explicitly specd keyfields. */
   1368           || compare_general (*(next_line - 1), *next_line, 0L, 0L,
   1369                               num_keyfields - 1))
   1370         {
   1371           char *p = *next_line;
   1372           char c;
   1373 
   1374           while ((c = *p++) && c != '\n')
   1375             /* Do nothing. */ ;
   1376           *(p - 1) = 0;
   1377           indexify (*next_line, ostream);
   1378         }
   1379     }
   1380 
   1381   finish_index (ostream);
   1382 }
   1383 
   1384 /* Assume (and optionally verify) that each input file is sorted;
   1386    merge them and output the result.
   1387    Returns nonzero if any input file fails to be sorted.
   1388 
   1389    This is the high-level interface that can handle an unlimited
   1390    number of files.  */
   1391 
   1392 #define MAX_DIRECT_MERGE 10
   1393 
   1394 int
   1395 merge_files (char **infiles, int nfiles, char *outfile)
   1396 {
   1397   char **tempfiles;
   1398   int ntemps;
   1399   int i;
   1400   int value = 0;
   1401   int start_tempcount = tempcount;
   1402 
   1403   if (nfiles <= MAX_DIRECT_MERGE)
   1404     return merge_direct (infiles, nfiles, outfile);
   1405 
   1406   /* Merge groups of MAX_DIRECT_MERGE input files at a time,
   1407      making a temporary file to hold each group's result.  */
   1408 
   1409   ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
   1410   tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
   1411   for (i = 0; i < ntemps; i++)
   1412     {
   1413       int nf = MAX_DIRECT_MERGE;
   1414       if (i + 1 == ntemps)
   1415         nf = nfiles - i * MAX_DIRECT_MERGE;
   1416       tempfiles[i] = maketempname (++tempcount);
   1417       value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
   1418     }
   1419 
   1420   /* All temporary files that existed before are no longer needed
   1421      since their contents have been merged into our new tempfiles.
   1422      So delete them.  */
   1423   flush_tempfiles (start_tempcount);
   1424 
   1425   /* Now merge the temporary files we created.  */
   1426 
   1427   merge_files (tempfiles, ntemps, outfile);
   1428 
   1429   free (tempfiles);
   1430 
   1431   return value;
   1432 }
   1433 
   1434 /* Assume (and optionally verify) that each input file is sorted;
   1436    merge them and output the result.
   1437    Returns nonzero if any input file fails to be sorted.
   1438 
   1439    This version of merging will not work if the number of
   1440    input files gets too high.  Higher level functions
   1441    use it only with a bounded number of input files.  */
   1442 
   1443 int
   1444 merge_direct (char **infiles, int nfiles, char *outfile)
   1445 {
   1446   struct linebuffer *lb1, *lb2;
   1447   struct linebuffer **thisline, **prevline;
   1448   FILE **streams;
   1449   int i;
   1450   int nleft;
   1451   int lossage = 0;
   1452   int *file_lossage;
   1453   struct linebuffer *prev_out = 0;
   1454   FILE *ostream = stdout;
   1455 
   1456   if (outfile)
   1457     {
   1458       ostream = fopen (outfile, "w");
   1459     }
   1460   if (!ostream)
   1461     pfatal_with_name (outfile);
   1462 
   1463   init_index ();
   1464 
   1465   if (nfiles == 0)
   1466     {
   1467       if (outfile)
   1468         fclose (ostream);
   1469       return 0;
   1470     }
   1471 
   1472   /* For each file, make two line buffers.  Also, for each file, there
   1473      is an element of `thisline' which points at any time to one of the
   1474      file's two buffers, and an element of `prevline' which points to
   1475      the other buffer.  `thisline' is supposed to point to the next
   1476      available line from the file, while `prevline' holds the last file
   1477      line used, which is remembered so that we can verify that the file
   1478      is properly sorted. */
   1479 
   1480   /* lb1 and lb2 contain one buffer each per file. */
   1481   lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
   1482   lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
   1483 
   1484   /* thisline[i] points to the linebuffer holding the next available
   1485      line in file i, or is zero if there are no lines left in that file.  */
   1486   thisline = (struct linebuffer **)
   1487     xmalloc (nfiles * sizeof (struct linebuffer *));
   1488   /* prevline[i] points to the linebuffer holding the last used line
   1489      from file i.  This is just for verifying that file i is properly
   1490      sorted.  */
   1491   prevline = (struct linebuffer **)
   1492     xmalloc (nfiles * sizeof (struct linebuffer *));
   1493   /* streams[i] holds the input stream for file i.  */
   1494   streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
   1495   /* file_lossage[i] is nonzero if we already know file i is not
   1496      properly sorted.  */
   1497   file_lossage = (int *) xmalloc (nfiles * sizeof (int));
   1498 
   1499   /* Allocate and initialize all that storage. */
   1500 
   1501   for (i = 0; i < nfiles; i++)
   1502     {
   1503       initbuffer (&lb1[i]);
   1504       initbuffer (&lb2[i]);
   1505       thisline[i] = &lb1[i];
   1506       prevline[i] = &lb2[i];
   1507       file_lossage[i] = 0;
   1508       streams[i] = fopen (infiles[i], "r");
   1509       if (!streams[i])
   1510         pfatal_with_name (infiles[i]);
   1511 
   1512       readline (thisline[i], streams[i]);
   1513     }
   1514 
   1515   /* Keep count of number of files not at eof. */
   1516   nleft = nfiles;
   1517 
   1518   while (nleft)
   1519     {
   1520       struct linebuffer *best = 0;
   1521       struct linebuffer *exch;
   1522       int bestfile = -1;
   1523       int i;
   1524 
   1525       /* Look at the next avail line of each file; choose the least one.  */
   1526 
   1527       for (i = 0; i < nfiles; i++)
   1528         {
   1529           if (thisline[i] &&
   1530               (!best ||
   1531                0 < compare_general (best->buffer, thisline[i]->buffer,
   1532                                  (long) bestfile, (long) i, num_keyfields)))
   1533             {
   1534               best = thisline[i];
   1535               bestfile = i;
   1536             }
   1537         }
   1538 
   1539       /* Output that line, unless it matches the previous one and we
   1540          don't want duplicates. */
   1541 
   1542       if (!(prev_out &&
   1543             !compare_general (prev_out->buffer,
   1544                               best->buffer, 0L, 1L, num_keyfields - 1)))
   1545         indexify (best->buffer, ostream);
   1546       prev_out = best;
   1547 
   1548       /* Now make the line the previous of its file, and fetch a new
   1549          line from that file.  */
   1550 
   1551       exch = prevline[bestfile];
   1552       prevline[bestfile] = thisline[bestfile];
   1553       thisline[bestfile] = exch;
   1554 
   1555       while (1)
   1556         {
   1557           /* If the file has no more, mark it empty. */
   1558 
   1559           if (feof (streams[bestfile]))
   1560             {
   1561               thisline[bestfile] = 0;
   1562               /* Update the number of files still not empty. */
   1563               nleft--;
   1564               break;
   1565             }
   1566           readline (thisline[bestfile], streams[bestfile]);
   1567           if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile]))
   1568             break;
   1569         }
   1570     }
   1571 
   1572   finish_index (ostream);
   1573 
   1574   /* Free all storage and close all input streams. */
   1575 
   1576   for (i = 0; i < nfiles; i++)
   1577     {
   1578       fclose (streams[i]);
   1579       free (lb1[i].buffer);
   1580       free (lb2[i].buffer);
   1581     }
   1582   free (file_lossage);
   1583   free (lb1);
   1584   free (lb2);
   1585   free (thisline);
   1586   free (prevline);
   1587   free (streams);
   1588 
   1589   if (outfile)
   1590     fclose (ostream);
   1591 
   1592   return lossage;
   1593 }
   1594 
   1595 /* Print error message and exit.  */
   1597 
   1598 void
   1599 fatal (const char *format, const char *arg)
   1600 {
   1601   error (format, arg);
   1602   xexit (1);
   1603 }
   1604 
   1605 /* Print error message.  FORMAT is printf control string, ARG is arg for it. */
   1606 void
   1607 error (const char *format, const char *arg)
   1608 {
   1609   printf ("%s: ", program_name);
   1610   printf (format, arg);
   1611   if (format[strlen (format) -1] != '\n')
   1612     printf ("\n");
   1613 }
   1614 
   1615 void
   1616 perror_with_name (const char *name)
   1617 {
   1618   fprintf (stderr, "%s: ", program_name);
   1619   perror (name);
   1620 }
   1621 
   1622 void
   1623 pfatal_with_name (const char *name)
   1624 {
   1625   perror_with_name (name);
   1626   xexit (1);
   1627 }
   1628 
   1629 
   1630 /* Return a newly-allocated string concatenating S1 and S2.  */
   1632 
   1633 char *
   1634 concat (char *s1, char *s2)
   1635 {
   1636   int len1 = strlen (s1), len2 = strlen (s2);
   1637   char *result = (char *) xmalloc (len1 + len2 + 1);
   1638 
   1639   strcpy (result, s1);
   1640   strcpy (result + len1, s2);
   1641   *(result + len1 + len2) = 0;
   1642 
   1643   return result;
   1644 }
   1645