texindex.c revision 1.1 1 1.1 christos /* $NetBSD: texindex.c,v 1.1 2016/01/14 00:11:29 christos Exp $ */
2 1.1 christos
3 1.1 christos /* texindex -- sort TeX index dribble output into an actual index.
4 1.1 christos Id: texindex.c,v 1.11 2004/04/11 17:56:47 karl Exp
5 1.1 christos
6 1.1 christos Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001,
7 1.1 christos 2002, 2003, 2004 Free Software Foundation, Inc.
8 1.1 christos
9 1.1 christos This program is free software; you can redistribute it and/or modify
10 1.1 christos it under the terms of the GNU General Public License as published by
11 1.1 christos the Free Software Foundation; either version 2, or (at your option)
12 1.1 christos any later version.
13 1.1 christos
14 1.1 christos This program is distributed in the hope that it will be useful,
15 1.1 christos but WITHOUT ANY WARRANTY; without even the implied warranty of
16 1.1 christos MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 1.1 christos GNU General Public License for more details.
18 1.1 christos
19 1.1 christos You should have received a copy of the GNU General Public License
20 1.1 christos along with this program; if not, write to the Free Software
21 1.1 christos Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
22 1.1 christos
23 1.1 christos #include "system.h"
24 1.1 christos #include <getopt.h>
25 1.1 christos
26 1.1 christos static char *program_name = "texindex";
27 1.1 christos
28 1.1 christos #if defined (emacs)
29 1.1 christos # include "../src/config.h"
30 1.1 christos /* Some s/os.h files redefine these. */
31 1.1 christos # undef read
32 1.1 christos # undef close
33 1.1 christos # undef write
34 1.1 christos # undef open
35 1.1 christos #endif
36 1.1 christos
37 1.1 christos #if !defined (HAVE_MEMSET)
38 1.1 christos #undef memset
39 1.1 christos #define memset(ptr, ignore, count) bzero (ptr, count)
40 1.1 christos #endif
41 1.1 christos
42 1.1 christos char *mktemp (char *);
43 1.1 christos
44 1.1 christos #if !defined (SEEK_SET)
45 1.1 christos # define SEEK_SET 0
46 1.1 christos # define SEEK_CUR 1
47 1.1 christos # define SEEK_END 2
48 1.1 christos #endif /* !SEEK_SET */
49 1.1 christos
50 1.1 christos struct linebuffer;
51 1.1 christos
52 1.1 christos /* When sorting in core, this structure describes one line
53 1.1 christos and the position and length of its first keyfield. */
54 1.1 christos struct lineinfo
55 1.1 christos {
56 1.1 christos char *text; /* The actual text of the line. */
57 1.1 christos union {
58 1.1 christos char *text; /* The start of the key (for textual comparison). */
59 1.1 christos long number; /* The numeric value (for numeric comparison). */
60 1.1 christos } key;
61 1.1 christos long keylen; /* Length of KEY field. */
62 1.1 christos };
63 1.1 christos
64 1.1 christos /* This structure describes a field to use as a sort key. */
65 1.1 christos struct keyfield
66 1.1 christos {
67 1.1 christos int startwords; /* Number of words to skip. */
68 1.1 christos int startchars; /* Number of additional chars to skip. */
69 1.1 christos int endwords; /* Number of words to ignore at end. */
70 1.1 christos int endchars; /* Ditto for characters of last word. */
71 1.1 christos char ignore_blanks; /* Non-zero means ignore spaces and tabs. */
72 1.1 christos char fold_case; /* Non-zero means case doesn't matter. */
73 1.1 christos char reverse; /* Non-zero means compare in reverse order. */
74 1.1 christos char numeric; /* Non-zeros means field is ASCII numeric. */
75 1.1 christos char positional; /* Sort according to file position. */
76 1.1 christos char braced; /* Count balanced-braced groupings as fields. */
77 1.1 christos };
78 1.1 christos
79 1.1 christos /* Vector of keyfields to use. */
80 1.1 christos struct keyfield keyfields[3];
81 1.1 christos
82 1.1 christos /* Number of keyfields stored in that vector. */
83 1.1 christos int num_keyfields = 3;
84 1.1 christos
85 1.1 christos /* Vector of input file names, terminated with a null pointer. */
86 1.1 christos char **infiles;
87 1.1 christos
88 1.1 christos /* Vector of corresponding output file names, or NULL, meaning default it
89 1.1 christos (add an `s' to the end). */
90 1.1 christos char **outfiles;
91 1.1 christos
92 1.1 christos /* Length of `infiles'. */
93 1.1 christos int num_infiles;
94 1.1 christos
95 1.1 christos /* Pointer to the array of pointers to lines being sorted. */
96 1.1 christos char **linearray;
97 1.1 christos
98 1.1 christos /* The allocated length of `linearray'. */
99 1.1 christos long nlines;
100 1.1 christos
101 1.1 christos /* Directory to use for temporary files. On Unix, it ends with a slash. */
102 1.1 christos char *tempdir;
103 1.1 christos
104 1.1 christos /* Number of last temporary file. */
105 1.1 christos int tempcount;
106 1.1 christos
107 1.1 christos /* Number of last temporary file already deleted.
108 1.1 christos Temporary files are deleted by `flush_tempfiles' in order of creation. */
109 1.1 christos int last_deleted_tempcount;
110 1.1 christos
111 1.1 christos /* During in-core sort, this points to the base of the data block
112 1.1 christos which contains all the lines of data. */
113 1.1 christos char *text_base;
114 1.1 christos
115 1.1 christos /* Initially 0; changed to 1 if we want initials in this index. */
116 1.1 christos int need_initials;
117 1.1 christos
118 1.1 christos /* Remembers the first initial letter seen in this index, so we can
119 1.1 christos determine whether we need initials in the sorted form. */
120 1.1 christos char first_initial;
121 1.1 christos
122 1.1 christos /* Additional command switches .*/
123 1.1 christos
124 1.1 christos /* Nonzero means do not delete tempfiles -- for debugging. */
125 1.1 christos int keep_tempfiles;
126 1.1 christos
127 1.1 christos /* Forward declarations of functions in this file. */
128 1.1 christos void decode_command (int argc, char **argv);
129 1.1 christos void sort_in_core (char *infile, int total, char *outfile);
130 1.1 christos void sort_offline (char *infile, off_t total, char *outfile);
131 1.1 christos char **parsefile (char *filename, char **nextline, char *data, long int size);
132 1.1 christos char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr);
133 1.1 christos char *find_pos (char *str, int words, int chars, int ignore_blanks);
134 1.1 christos long find_value (char *start, long int length);
135 1.1 christos char *find_braced_pos (char *str, int words, int chars, int ignore_blanks);
136 1.1 christos char *find_braced_end (char *str);
137 1.1 christos void writelines (char **linearray, int nlines, FILE *ostream);
138 1.1 christos int compare_field (struct keyfield *keyfield, char *start1,
139 1.1 christos long int length1, long int pos1, char *start2,
140 1.1 christos long int length2, long int pos2);
141 1.1 christos int compare_full (const void *, const void *);
142 1.1 christos long readline (struct linebuffer *linebuffer, FILE *stream);
143 1.1 christos int merge_files (char **infiles, int nfiles, char *outfile);
144 1.1 christos int merge_direct (char **infiles, int nfiles, char *outfile);
145 1.1 christos void pfatal_with_name (const char *name);
146 1.1 christos void fatal (const char *format, const char *arg);
147 1.1 christos void error (const char *format, const char *arg);
148 1.1 christos void *xmalloc (), *xrealloc ();
149 1.1 christos char *concat (char *s1, char *s2);
150 1.1 christos void flush_tempfiles (int to_count);
151 1.1 christos
152 1.1 christos #define MAX_IN_CORE_SORT 500000
154 1.1 christos
155 1.1 christos int
156 1.1 christos main (int argc, char **argv)
157 1.1 christos {
158 1.1 christos int i;
159 1.1 christos
160 1.1 christos tempcount = 0;
161 1.1 christos last_deleted_tempcount = 0;
162 1.1 christos
163 1.1 christos #ifdef HAVE_SETLOCALE
164 1.1 christos /* Set locale via LC_ALL. */
165 1.1 christos setlocale (LC_ALL, "");
166 1.1 christos #endif
167 1.1 christos
168 1.1 christos /* Set the text message domain. */
169 1.1 christos bindtextdomain (PACKAGE, LOCALEDIR);
170 1.1 christos textdomain (PACKAGE);
171 1.1 christos
172 1.1 christos /* In case we write to a redirected stdout that fails. */
173 1.1 christos /* not ready atexit (close_stdout); */
174 1.1 christos
175 1.1 christos /* Describe the kind of sorting to do. */
176 1.1 christos /* The first keyfield uses the first braced field and folds case. */
177 1.1 christos keyfields[0].braced = 1;
178 1.1 christos keyfields[0].fold_case = 1;
179 1.1 christos keyfields[0].endwords = -1;
180 1.1 christos keyfields[0].endchars = -1;
181 1.1 christos
182 1.1 christos /* The second keyfield uses the second braced field, numerically. */
183 1.1 christos keyfields[1].braced = 1;
184 1.1 christos keyfields[1].numeric = 1;
185 1.1 christos keyfields[1].startwords = 1;
186 1.1 christos keyfields[1].endwords = -1;
187 1.1 christos keyfields[1].endchars = -1;
188 1.1 christos
189 1.1 christos /* The third keyfield (which is ignored while discarding duplicates)
190 1.1 christos compares the whole line. */
191 1.1 christos keyfields[2].endwords = -1;
192 1.1 christos keyfields[2].endchars = -1;
193 1.1 christos
194 1.1 christos decode_command (argc, argv);
195 1.1 christos
196 1.1 christos /* Process input files completely, one by one. */
197 1.1 christos
198 1.1 christos for (i = 0; i < num_infiles; i++)
199 1.1 christos {
200 1.1 christos int desc;
201 1.1 christos off_t ptr;
202 1.1 christos char *outfile;
203 1.1 christos struct stat instat;
204 1.1 christos
205 1.1 christos desc = open (infiles[i], O_RDONLY, 0);
206 1.1 christos if (desc < 0)
207 1.1 christos pfatal_with_name (infiles[i]);
208 1.1 christos
209 1.1 christos if (stat (infiles[i], &instat))
210 1.1 christos pfatal_with_name (infiles[i]);
211 1.1 christos if (S_ISDIR (instat.st_mode))
212 1.1 christos {
213 1.1 christos #ifdef EISDIR
214 1.1 christos errno = EISDIR;
215 1.1 christos #endif
216 1.1 christos pfatal_with_name (infiles[i]);
217 1.1 christos }
218 1.1 christos
219 1.1 christos lseek (desc, (off_t) 0, SEEK_END);
220 1.1 christos ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR);
221 1.1 christos
222 1.1 christos close (desc);
223 1.1 christos
224 1.1 christos outfile = outfiles[i];
225 1.1 christos if (!outfile)
226 1.1 christos outfile = concat (infiles[i], "s");
227 1.1 christos
228 1.1 christos need_initials = 0;
229 1.1 christos first_initial = '\0';
230 1.1 christos
231 1.1 christos if (ptr < MAX_IN_CORE_SORT)
232 1.1 christos /* Sort a small amount of data. */
233 1.1 christos sort_in_core (infiles[i], (int)ptr, outfile);
234 1.1 christos else
235 1.1 christos sort_offline (infiles[i], ptr, outfile);
236 1.1 christos }
237 1.1 christos
238 1.1 christos flush_tempfiles (tempcount);
239 1.1 christos xexit (0);
240 1.1 christos return 0; /* Avoid bogus warnings. */
241 1.1 christos }
242 1.1 christos
243 1.1 christos typedef struct
245 1.1 christos {
246 1.1 christos char *long_name;
247 1.1 christos char *short_name;
248 1.1 christos int *variable_ref;
249 1.1 christos int variable_value;
250 1.1 christos char *arg_name;
251 1.1 christos char *doc_string;
252 1.1 christos } TEXINDEX_OPTION;
253 1.1 christos
254 1.1 christos TEXINDEX_OPTION texindex_options[] = {
255 1.1 christos { "--help", "-h", (int *)NULL, 0, (char *)NULL,
256 1.1 christos N_("display this help and exit") },
257 1.1 christos { "--keep", "-k", &keep_tempfiles, 1, (char *)NULL,
258 1.1 christos N_("keep temporary files around after processing") },
259 1.1 christos { "--no-keep", 0, &keep_tempfiles, 0, (char *)NULL,
260 1.1 christos N_("do not keep temporary files around after processing (default)") },
261 1.1 christos { "--output", "-o", (int *)NULL, 0, "FILE",
262 1.1 christos N_("send output to FILE") },
263 1.1 christos { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
264 1.1 christos N_("display version information and exit") },
265 1.1 christos { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
266 1.1 christos };
267 1.1 christos
268 1.1 christos void
269 1.1 christos usage (int result_value)
270 1.1 christos {
271 1.1 christos register int i;
272 1.1 christos FILE *f = result_value ? stderr : stdout;
273 1.1 christos
274 1.1 christos fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name);
275 1.1 christos fprintf (f, _("Generate a sorted index for each TeX output FILE.\n"));
276 1.1 christos /* Avoid trigraph nonsense. */
277 1.1 christos fprintf (f,
278 1.1 christos _("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"),
279 1.1 christos '?', '?'); /* avoid trigraph in cat-id-tbl.c */
280 1.1 christos fprintf (f, _("\nOptions:\n"));
281 1.1 christos
282 1.1 christos for (i = 0; texindex_options[i].long_name; i++)
283 1.1 christos {
284 1.1 christos putc (' ', f);
285 1.1 christos
286 1.1 christos if (texindex_options[i].short_name)
287 1.1 christos fprintf (f, "%s, ", texindex_options[i].short_name);
288 1.1 christos
289 1.1 christos fprintf (f, "%s %s",
290 1.1 christos texindex_options[i].long_name,
291 1.1 christos texindex_options[i].arg_name
292 1.1 christos ? texindex_options[i].arg_name : "");
293 1.1 christos
294 1.1 christos fprintf (f, "\t%s\n", _(texindex_options[i].doc_string));
295 1.1 christos }
296 1.1 christos fputs (_("\n\
297 1.1 christos Email bug reports to bug-texinfo (at) gnu.org,\n\
298 1.1 christos general questions and discussion to help-texinfo (at) gnu.org.\n\
299 1.1 christos Texinfo home page: http://www.gnu.org/software/texinfo/"), f);
300 1.1 christos fputs ("\n", f);
301 1.1 christos
302 1.1 christos xexit (result_value);
303 1.1 christos }
304 1.1 christos
305 1.1 christos /* Decode the command line arguments to set the parameter variables
306 1.1 christos and set up the vector of keyfields and the vector of input files. */
307 1.1 christos
308 1.1 christos void
309 1.1 christos decode_command (int argc, char **argv)
310 1.1 christos {
311 1.1 christos int arg_index = 1;
312 1.1 christos char **ip;
313 1.1 christos char **op;
314 1.1 christos
315 1.1 christos /* Store default values into parameter variables. */
316 1.1 christos
317 1.1 christos tempdir = getenv ("TMPDIR");
318 1.1 christos if (tempdir == NULL)
319 1.1 christos tempdir = getenv ("TEMP");
320 1.1 christos if (tempdir == NULL)
321 1.1 christos tempdir = getenv ("TMP");
322 1.1 christos if (tempdir == NULL)
323 1.1 christos tempdir = DEFAULT_TMPDIR;
324 1.1 christos else
325 1.1 christos tempdir = concat (tempdir, "/");
326 1.1 christos
327 1.1 christos keep_tempfiles = 0;
328 1.1 christos
329 1.1 christos /* Allocate ARGC input files, which must be enough. */
330 1.1 christos
331 1.1 christos infiles = (char **) xmalloc (argc * sizeof (char *));
332 1.1 christos outfiles = (char **) xmalloc (argc * sizeof (char *));
333 1.1 christos ip = infiles;
334 1.1 christos op = outfiles;
335 1.1 christos
336 1.1 christos while (arg_index < argc)
337 1.1 christos {
338 1.1 christos char *arg = argv[arg_index++];
339 1.1 christos
340 1.1 christos if (*arg == '-')
341 1.1 christos {
342 1.1 christos if (strcmp (arg, "--version") == 0)
343 1.1 christos {
344 1.1 christos printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION);
345 1.1 christos puts ("");
346 1.1 christos puts ("Copyright (C) 2004 Free Software Foundation, Inc.");
347 1.1 christos printf (_("There is NO warranty. You may redistribute this software\n\
348 1.1 christos under the terms of the GNU General Public License.\n\
349 1.1 christos For more information about these matters, see the files named COPYING.\n"));
350 1.1 christos xexit (0);
351 1.1 christos }
352 1.1 christos else if ((strcmp (arg, "--keep") == 0) ||
353 1.1 christos (strcmp (arg, "-k") == 0))
354 1.1 christos {
355 1.1 christos keep_tempfiles = 1;
356 1.1 christos }
357 1.1 christos else if ((strcmp (arg, "--help") == 0) ||
358 1.1 christos (strcmp (arg, "-h") == 0))
359 1.1 christos {
360 1.1 christos usage (0);
361 1.1 christos }
362 1.1 christos else if ((strcmp (arg, "--output") == 0) ||
363 1.1 christos (strcmp (arg, "-o") == 0))
364 1.1 christos {
365 1.1 christos if (argv[arg_index] != (char *)NULL)
366 1.1 christos {
367 1.1 christos arg_index++;
368 1.1 christos if (op > outfiles)
369 1.1 christos *(op - 1) = argv[arg_index];
370 1.1 christos }
371 1.1 christos else
372 1.1 christos usage (1);
373 1.1 christos }
374 1.1 christos else
375 1.1 christos usage (1);
376 1.1 christos }
377 1.1 christos else
378 1.1 christos {
379 1.1 christos *ip++ = arg;
380 1.1 christos *op++ = (char *)NULL;
381 1.1 christos }
382 1.1 christos }
383 1.1 christos
384 1.1 christos /* Record number of keyfields and terminate list of filenames. */
385 1.1 christos num_infiles = ip - infiles;
386 1.1 christos *ip = (char *)NULL;
387 1.1 christos if (num_infiles == 0)
388 1.1 christos usage (1);
389 1.1 christos }
390 1.1 christos
391 1.1 christos /* Return a name for temporary file COUNT. */
393 1.1 christos
394 1.1 christos static char *
395 1.1 christos maketempname (int count)
396 1.1 christos {
397 1.1 christos static char *tempbase = NULL;
398 1.1 christos char tempsuffix[10];
399 1.1 christos
400 1.1 christos if (!tempbase)
401 1.1 christos {
402 1.1 christos int fd;
403 1.1 christos tempbase = concat (tempdir, "txidxXXXXXX");
404 1.1 christos
405 1.1 christos fd = mkstemp (tempbase);
406 1.1 christos if (fd == -1)
407 1.1 christos pfatal_with_name (tempbase);
408 1.1 christos }
409 1.1 christos
410 1.1 christos sprintf (tempsuffix, ".%d", count);
411 1.1 christos return concat (tempbase, tempsuffix);
412 1.1 christos }
413 1.1 christos
414 1.1 christos
415 1.1 christos /* Delete all temporary files up to TO_COUNT. */
416 1.1 christos
417 1.1 christos void
418 1.1 christos flush_tempfiles (int to_count)
419 1.1 christos {
420 1.1 christos if (keep_tempfiles)
421 1.1 christos return;
422 1.1 christos while (last_deleted_tempcount < to_count)
423 1.1 christos unlink (maketempname (++last_deleted_tempcount));
424 1.1 christos }
425 1.1 christos
426 1.1 christos
427 1.1 christos /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
429 1.1 christos
430 1.1 christos int
431 1.1 christos compare_full (const void *p1, const void *p2)
432 1.1 christos {
433 1.1 christos char **line1 = (char **) p1;
434 1.1 christos char **line2 = (char **) p2;
435 1.1 christos int i;
436 1.1 christos
437 1.1 christos /* Compare using the first keyfield;
438 1.1 christos if that does not distinguish the lines, try the second keyfield;
439 1.1 christos and so on. */
440 1.1 christos
441 1.1 christos for (i = 0; i < num_keyfields; i++)
442 1.1 christos {
443 1.1 christos long length1, length2;
444 1.1 christos char *start1 = find_field (&keyfields[i], *line1, &length1);
445 1.1 christos char *start2 = find_field (&keyfields[i], *line2, &length2);
446 1.1 christos int tem = compare_field (&keyfields[i], start1, length1,
447 1.1 christos *line1 - text_base,
448 1.1 christos start2, length2, *line2 - text_base);
449 1.1 christos if (tem)
450 1.1 christos {
451 1.1 christos if (keyfields[i].reverse)
452 1.1 christos return -tem;
453 1.1 christos return tem;
454 1.1 christos }
455 1.1 christos }
456 1.1 christos
457 1.1 christos return 0; /* Lines match exactly. */
458 1.1 christos }
459 1.1 christos
460 1.1 christos /* Compare LINE1 and LINE2, described by structures
461 1.1 christos in which the first keyfield is identified in advance.
462 1.1 christos For positional sorting, assumes that the order of the lines in core
463 1.1 christos reflects their nominal order. */
464 1.1 christos int
465 1.1 christos compare_prepared (const void *p1, const void *p2)
466 1.1 christos {
467 1.1 christos struct lineinfo *line1 = (struct lineinfo *) p1;
468 1.1 christos struct lineinfo *line2 = (struct lineinfo *) p2;
469 1.1 christos int i;
470 1.1 christos int tem;
471 1.1 christos char *text1, *text2;
472 1.1 christos
473 1.1 christos /* Compare using the first keyfield, which has been found for us already. */
474 1.1 christos if (keyfields->positional)
475 1.1 christos {
476 1.1 christos if (line1->text - text_base > line2->text - text_base)
477 1.1 christos tem = 1;
478 1.1 christos else
479 1.1 christos tem = -1;
480 1.1 christos }
481 1.1 christos else if (keyfields->numeric)
482 1.1 christos tem = line1->key.number - line2->key.number;
483 1.1 christos else
484 1.1 christos tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
485 1.1 christos line2->key.text, line2->keylen, 0);
486 1.1 christos if (tem)
487 1.1 christos {
488 1.1 christos if (keyfields->reverse)
489 1.1 christos return -tem;
490 1.1 christos return tem;
491 1.1 christos }
492 1.1 christos
493 1.1 christos text1 = line1->text;
494 1.1 christos text2 = line2->text;
495 1.1 christos
496 1.1 christos /* Compare using the second keyfield;
497 1.1 christos if that does not distinguish the lines, try the third keyfield;
498 1.1 christos and so on. */
499 1.1 christos
500 1.1 christos for (i = 1; i < num_keyfields; i++)
501 1.1 christos {
502 1.1 christos long length1, length2;
503 1.1 christos char *start1 = find_field (&keyfields[i], text1, &length1);
504 1.1 christos char *start2 = find_field (&keyfields[i], text2, &length2);
505 1.1 christos int tem = compare_field (&keyfields[i], start1, length1,
506 1.1 christos text1 - text_base,
507 1.1 christos start2, length2, text2 - text_base);
508 1.1 christos if (tem)
509 1.1 christos {
510 1.1 christos if (keyfields[i].reverse)
511 1.1 christos return -tem;
512 1.1 christos return tem;
513 1.1 christos }
514 1.1 christos }
515 1.1 christos
516 1.1 christos return 0; /* Lines match exactly. */
517 1.1 christos }
518 1.1 christos
519 1.1 christos /* Like compare_full but more general.
520 1.1 christos You can pass any strings, and you can say how many keyfields to use.
521 1.1 christos POS1 and POS2 should indicate the nominal positional ordering of
522 1.1 christos the two lines in the input. */
523 1.1 christos
524 1.1 christos int
525 1.1 christos compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields)
526 1.1 christos {
527 1.1 christos int i;
528 1.1 christos
529 1.1 christos /* Compare using the first keyfield;
530 1.1 christos if that does not distinguish the lines, try the second keyfield;
531 1.1 christos and so on. */
532 1.1 christos
533 1.1 christos for (i = 0; i < use_keyfields; i++)
534 1.1 christos {
535 1.1 christos long length1, length2;
536 1.1 christos char *start1 = find_field (&keyfields[i], str1, &length1);
537 1.1 christos char *start2 = find_field (&keyfields[i], str2, &length2);
538 1.1 christos int tem = compare_field (&keyfields[i], start1, length1, pos1,
539 1.1 christos start2, length2, pos2);
540 1.1 christos if (tem)
541 1.1 christos {
542 1.1 christos if (keyfields[i].reverse)
543 1.1 christos return -tem;
544 1.1 christos return tem;
545 1.1 christos }
546 1.1 christos }
547 1.1 christos
548 1.1 christos return 0; /* Lines match exactly. */
549 1.1 christos }
550 1.1 christos
551 1.1 christos /* Find the start and length of a field in STR according to KEYFIELD.
552 1.1 christos A pointer to the starting character is returned, and the length
553 1.1 christos is stored into the int that LENGTHPTR points to. */
554 1.1 christos
555 1.1 christos char *
556 1.1 christos find_field (struct keyfield *keyfield, char *str, long int *lengthptr)
557 1.1 christos {
558 1.1 christos char *start;
559 1.1 christos char *end;
560 1.1 christos char *(*fun) ();
561 1.1 christos
562 1.1 christos if (keyfield->braced)
563 1.1 christos fun = find_braced_pos;
564 1.1 christos else
565 1.1 christos fun = find_pos;
566 1.1 christos
567 1.1 christos start = (*fun) (str, keyfield->startwords, keyfield->startchars,
568 1.1 christos keyfield->ignore_blanks);
569 1.1 christos if (keyfield->endwords < 0)
570 1.1 christos {
571 1.1 christos if (keyfield->braced)
572 1.1 christos end = find_braced_end (start);
573 1.1 christos else
574 1.1 christos {
575 1.1 christos end = start;
576 1.1 christos while (*end && *end != '\n')
577 1.1 christos end++;
578 1.1 christos }
579 1.1 christos }
580 1.1 christos else
581 1.1 christos {
582 1.1 christos end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
583 1.1 christos if (end - str < start - str)
584 1.1 christos end = start;
585 1.1 christos }
586 1.1 christos *lengthptr = end - start;
587 1.1 christos return start;
588 1.1 christos }
589 1.1 christos
590 1.1 christos /* Return a pointer to a specified place within STR,
591 1.1 christos skipping (from the beginning) WORDS words and then CHARS chars.
592 1.1 christos If IGNORE_BLANKS is nonzero, we skip all blanks
593 1.1 christos after finding the specified word. */
594 1.1 christos
595 1.1 christos char *
596 1.1 christos find_pos (char *str, int words, int chars, int ignore_blanks)
597 1.1 christos {
598 1.1 christos int i;
599 1.1 christos char *p = str;
600 1.1 christos
601 1.1 christos for (i = 0; i < words; i++)
602 1.1 christos {
603 1.1 christos char c;
604 1.1 christos /* Find next bunch of nonblanks and skip them. */
605 1.1 christos while ((c = *p) == ' ' || c == '\t')
606 1.1 christos p++;
607 1.1 christos while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
608 1.1 christos p++;
609 1.1 christos if (!*p || *p == '\n')
610 1.1 christos return p;
611 1.1 christos }
612 1.1 christos
613 1.1 christos while (*p == ' ' || *p == '\t')
614 1.1 christos p++;
615 1.1 christos
616 1.1 christos for (i = 0; i < chars; i++)
617 1.1 christos {
618 1.1 christos if (!*p || *p == '\n')
619 1.1 christos break;
620 1.1 christos p++;
621 1.1 christos }
622 1.1 christos return p;
623 1.1 christos }
624 1.1 christos
625 1.1 christos /* Like find_pos but assumes that each field is surrounded by braces
626 1.1 christos and that braces within fields are balanced. */
627 1.1 christos
628 1.1 christos char *
629 1.1 christos find_braced_pos (char *str, int words, int chars, int ignore_blanks)
630 1.1 christos {
631 1.1 christos int i;
632 1.1 christos int bracelevel;
633 1.1 christos char *p = str;
634 1.1 christos char c;
635 1.1 christos
636 1.1 christos for (i = 0; i < words; i++)
637 1.1 christos {
638 1.1 christos bracelevel = 1;
639 1.1 christos while ((c = *p++) != '{' && c != '\n' && c)
640 1.1 christos /* Do nothing. */ ;
641 1.1 christos if (c != '{')
642 1.1 christos return p - 1;
643 1.1 christos while (bracelevel)
644 1.1 christos {
645 1.1 christos c = *p++;
646 1.1 christos if (c == '{')
647 1.1 christos bracelevel++;
648 1.1 christos if (c == '}')
649 1.1 christos bracelevel--;
650 1.1 christos if (c == 0 || c == '\n')
651 1.1 christos return p - 1;
652 1.1 christos }
653 1.1 christos }
654 1.1 christos
655 1.1 christos while ((c = *p++) != '{' && c != '\n' && c)
656 1.1 christos /* Do nothing. */ ;
657 1.1 christos
658 1.1 christos if (c != '{')
659 1.1 christos return p - 1;
660 1.1 christos
661 1.1 christos if (ignore_blanks)
662 1.1 christos while ((c = *p) == ' ' || c == '\t')
663 1.1 christos p++;
664 1.1 christos
665 1.1 christos for (i = 0; i < chars; i++)
666 1.1 christos {
667 1.1 christos if (!*p || *p == '\n')
668 1.1 christos break;
669 1.1 christos p++;
670 1.1 christos }
671 1.1 christos return p;
672 1.1 christos }
673 1.1 christos
674 1.1 christos /* Find the end of the balanced-brace field which starts at STR.
675 1.1 christos The position returned is just before the closing brace. */
676 1.1 christos
677 1.1 christos char *
678 1.1 christos find_braced_end (char *str)
679 1.1 christos {
680 1.1 christos int bracelevel;
681 1.1 christos char *p = str;
682 1.1 christos char c;
683 1.1 christos
684 1.1 christos bracelevel = 1;
685 1.1 christos while (bracelevel)
686 1.1 christos {
687 1.1 christos c = *p++;
688 1.1 christos if (c == '{')
689 1.1 christos bracelevel++;
690 1.1 christos if (c == '}')
691 1.1 christos bracelevel--;
692 1.1 christos if (c == 0 || c == '\n')
693 1.1 christos return p - 1;
694 1.1 christos }
695 1.1 christos return p - 1;
696 1.1 christos }
697 1.1 christos
698 1.1 christos long
699 1.1 christos find_value (char *start, long int length)
700 1.1 christos {
701 1.1 christos while (length != 0L)
702 1.1 christos {
703 1.1 christos if (isdigit (*start))
704 1.1 christos return atol (start);
705 1.1 christos length--;
706 1.1 christos start++;
707 1.1 christos }
708 1.1 christos return 0l;
709 1.1 christos }
710 1.1 christos
711 1.1 christos /* Vector used to translate characters for comparison.
712 1.1 christos This is how we make all alphanumerics follow all else,
713 1.1 christos and ignore case in the first sorting. */
714 1.1 christos int char_order[256];
715 1.1 christos
716 1.1 christos void
717 1.1 christos init_char_order (void)
718 1.1 christos {
719 1.1 christos int i;
720 1.1 christos for (i = 1; i < 256; i++)
721 1.1 christos char_order[i] = i;
722 1.1 christos
723 1.1 christos for (i = '0'; i <= '9'; i++)
724 1.1 christos char_order[i] += 512;
725 1.1 christos
726 1.1 christos for (i = 'a'; i <= 'z'; i++)
727 1.1 christos {
728 1.1 christos char_order[i] = 512 + i;
729 1.1 christos char_order[i + 'A' - 'a'] = 512 + i;
730 1.1 christos }
731 1.1 christos }
732 1.1 christos
733 1.1 christos /* Compare two fields (each specified as a start pointer and a character count)
734 1.1 christos according to KEYFIELD.
735 1.1 christos The sign of the value reports the relation between the fields. */
736 1.1 christos
737 1.1 christos int
738 1.1 christos compare_field (struct keyfield *keyfield, char *start1, long int length1,
739 1.1 christos long int pos1, char *start2, long int length2, long int pos2)
740 1.1 christos {
741 1.1 christos if (keyfields->positional)
742 1.1 christos {
743 1.1 christos if (pos1 > pos2)
744 1.1 christos return 1;
745 1.1 christos else
746 1.1 christos return -1;
747 1.1 christos }
748 1.1 christos if (keyfield->numeric)
749 1.1 christos {
750 1.1 christos long value = find_value (start1, length1) - find_value (start2, length2);
751 1.1 christos if (value > 0)
752 1.1 christos return 1;
753 1.1 christos if (value < 0)
754 1.1 christos return -1;
755 1.1 christos return 0;
756 1.1 christos }
757 1.1 christos else
758 1.1 christos {
759 1.1 christos char *p1 = start1;
760 1.1 christos char *p2 = start2;
761 1.1 christos char *e1 = start1 + length1;
762 1.1 christos char *e2 = start2 + length2;
763 1.1 christos
764 1.1 christos while (1)
765 1.1 christos {
766 1.1 christos int c1, c2;
767 1.1 christos
768 1.1 christos if (p1 == e1)
769 1.1 christos c1 = 0;
770 1.1 christos else
771 1.1 christos c1 = *p1++;
772 1.1 christos if (p2 == e2)
773 1.1 christos c2 = 0;
774 1.1 christos else
775 1.1 christos c2 = *p2++;
776 1.1 christos
777 1.1 christos if (char_order[c1] != char_order[c2])
778 1.1 christos return char_order[c1] - char_order[c2];
779 1.1 christos if (!c1)
780 1.1 christos break;
781 1.1 christos }
782 1.1 christos
783 1.1 christos /* Strings are equal except possibly for case. */
784 1.1 christos p1 = start1;
785 1.1 christos p2 = start2;
786 1.1 christos while (1)
787 1.1 christos {
788 1.1 christos int c1, c2;
789 1.1 christos
790 1.1 christos if (p1 == e1)
791 1.1 christos c1 = 0;
792 1.1 christos else
793 1.1 christos c1 = *p1++;
794 1.1 christos if (p2 == e2)
795 1.1 christos c2 = 0;
796 1.1 christos else
797 1.1 christos c2 = *p2++;
798 1.1 christos
799 1.1 christos if (c1 != c2)
800 1.1 christos /* Reverse sign here so upper case comes out last. */
801 1.1 christos return c2 - c1;
802 1.1 christos if (!c1)
803 1.1 christos break;
804 1.1 christos }
805 1.1 christos
806 1.1 christos return 0;
807 1.1 christos }
808 1.1 christos }
809 1.1 christos
810 1.1 christos /* A `struct linebuffer' is a structure which holds a line of text.
812 1.1 christos `readline' reads a line from a stream into a linebuffer
813 1.1 christos and works regardless of the length of the line. */
814 1.1 christos
815 1.1 christos struct linebuffer
816 1.1 christos {
817 1.1 christos long size;
818 1.1 christos char *buffer;
819 1.1 christos };
820 1.1 christos
821 1.1 christos /* Initialize LINEBUFFER for use. */
822 1.1 christos
823 1.1 christos void
824 1.1 christos initbuffer (struct linebuffer *linebuffer)
825 1.1 christos {
826 1.1 christos linebuffer->size = 200;
827 1.1 christos linebuffer->buffer = (char *) xmalloc (200);
828 1.1 christos }
829 1.1 christos
830 1.1 christos /* Read a line of text from STREAM into LINEBUFFER.
831 1.1 christos Return the length of the line. */
832 1.1 christos
833 1.1 christos long
834 1.1 christos readline (struct linebuffer *linebuffer, FILE *stream)
835 1.1 christos {
836 1.1 christos char *buffer = linebuffer->buffer;
837 1.1 christos char *p = linebuffer->buffer;
838 1.1 christos char *end = p + linebuffer->size;
839 1.1 christos
840 1.1 christos while (1)
841 1.1 christos {
842 1.1 christos int c = getc (stream);
843 1.1 christos if (p == end)
844 1.1 christos {
845 1.1 christos buffer = (char *) xrealloc (buffer, linebuffer->size *= 2);
846 1.1 christos p += buffer - linebuffer->buffer;
847 1.1 christos end += buffer - linebuffer->buffer;
848 1.1 christos linebuffer->buffer = buffer;
849 1.1 christos }
850 1.1 christos if (c < 0 || c == '\n')
851 1.1 christos {
852 1.1 christos *p = 0;
853 1.1 christos break;
854 1.1 christos }
855 1.1 christos *p++ = c;
856 1.1 christos }
857 1.1 christos
858 1.1 christos return p - buffer;
859 1.1 christos }
860 1.1 christos
861 1.1 christos /* Sort an input file too big to sort in core. */
863 1.1 christos
864 1.1 christos void
865 1.1 christos sort_offline (char *infile, off_t total, char *outfile)
866 1.1 christos {
867 1.1 christos /* More than enough. */
868 1.1 christos int ntemps = 2 * (total + MAX_IN_CORE_SORT - 1) / MAX_IN_CORE_SORT;
869 1.1 christos char **tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
870 1.1 christos FILE *istream = fopen (infile, "r");
871 1.1 christos int i;
872 1.1 christos struct linebuffer lb;
873 1.1 christos long linelength;
874 1.1 christos int failure = 0;
875 1.1 christos
876 1.1 christos initbuffer (&lb);
877 1.1 christos
878 1.1 christos /* Read in one line of input data. */
879 1.1 christos
880 1.1 christos linelength = readline (&lb, istream);
881 1.1 christos
882 1.1 christos if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
883 1.1 christos {
884 1.1 christos error (_("%s: not a texinfo index file"), infile);
885 1.1 christos return;
886 1.1 christos }
887 1.1 christos
888 1.1 christos /* Split up the input into `ntemps' temporary files, or maybe fewer,
889 1.1 christos and put the new files' names into `tempfiles' */
890 1.1 christos
891 1.1 christos for (i = 0; i < ntemps; i++)
892 1.1 christos {
893 1.1 christos char *outname = maketempname (++tempcount);
894 1.1 christos FILE *ostream = fopen (outname, "w");
895 1.1 christos long tempsize = 0;
896 1.1 christos
897 1.1 christos if (!ostream)
898 1.1 christos pfatal_with_name (outname);
899 1.1 christos tempfiles[i] = outname;
900 1.1 christos
901 1.1 christos /* Copy lines into this temp file as long as it does not make file
902 1.1 christos "too big" or until there are no more lines. */
903 1.1 christos
904 1.1 christos while (tempsize + linelength + 1 <= MAX_IN_CORE_SORT)
905 1.1 christos {
906 1.1 christos tempsize += linelength + 1;
907 1.1 christos fputs (lb.buffer, ostream);
908 1.1 christos putc ('\n', ostream);
909 1.1 christos
910 1.1 christos /* Read another line of input data. */
911 1.1 christos
912 1.1 christos linelength = readline (&lb, istream);
913 1.1 christos if (!linelength && feof (istream))
914 1.1 christos break;
915 1.1 christos
916 1.1 christos if (lb.buffer[0] != '\\' && lb.buffer[0] != '@')
917 1.1 christos {
918 1.1 christos error (_("%s: not a texinfo index file"), infile);
919 1.1 christos failure = 1;
920 1.1 christos goto fail;
921 1.1 christos }
922 1.1 christos }
923 1.1 christos fclose (ostream);
924 1.1 christos if (feof (istream))
925 1.1 christos break;
926 1.1 christos }
927 1.1 christos
928 1.1 christos free (lb.buffer);
929 1.1 christos
930 1.1 christos fail:
931 1.1 christos /* Record number of temp files we actually needed. */
932 1.1 christos
933 1.1 christos ntemps = i;
934 1.1 christos
935 1.1 christos /* Sort each tempfile into another tempfile.
936 1.1 christos Delete the first set of tempfiles and put the names of the second
937 1.1 christos into `tempfiles'. */
938 1.1 christos
939 1.1 christos for (i = 0; i < ntemps; i++)
940 1.1 christos {
941 1.1 christos char *newtemp = maketempname (++tempcount);
942 1.1 christos sort_in_core (tempfiles[i], MAX_IN_CORE_SORT, newtemp);
943 1.1 christos if (!keep_tempfiles)
944 1.1 christos unlink (tempfiles[i]);
945 1.1 christos tempfiles[i] = newtemp;
946 1.1 christos }
947 1.1 christos
948 1.1 christos if (failure)
949 1.1 christos return;
950 1.1 christos
951 1.1 christos /* Merge the tempfiles together and indexify. */
952 1.1 christos
953 1.1 christos merge_files (tempfiles, ntemps, outfile);
954 1.1 christos }
955 1.1 christos
956 1.1 christos /* Sort INFILE, whose size is TOTAL,
958 1.1 christos assuming that is small enough to be done in-core,
959 1.1 christos then indexify it and send the output to OUTFILE (or to stdout). */
960 1.1 christos
961 1.1 christos void
962 1.1 christos sort_in_core (char *infile, int total, char *outfile)
963 1.1 christos {
964 1.1 christos char **nextline;
965 1.1 christos char *data = (char *) xmalloc (total + 1);
966 1.1 christos char *file_data;
967 1.1 christos long file_size;
968 1.1 christos int i;
969 1.1 christos FILE *ostream = stdout;
970 1.1 christos struct lineinfo *lineinfo;
971 1.1 christos
972 1.1 christos /* Read the contents of the file into the moby array `data'. */
973 1.1 christos
974 1.1 christos int desc = open (infile, O_RDONLY, 0);
975 1.1 christos
976 1.1 christos if (desc < 0)
977 1.1 christos fatal (_("failure reopening %s"), infile);
978 1.1 christos for (file_size = 0;;)
979 1.1 christos {
980 1.1 christos i = read (desc, data + file_size, total - file_size);
981 1.1 christos if (i <= 0)
982 1.1 christos break;
983 1.1 christos file_size += i;
984 1.1 christos }
985 1.1 christos file_data = data;
986 1.1 christos data[file_size] = 0;
987 1.1 christos
988 1.1 christos close (desc);
989 1.1 christos
990 1.1 christos if (file_size > 0 && data[0] != '\\' && data[0] != '@')
991 1.1 christos {
992 1.1 christos error (_("%s: not a texinfo index file"), infile);
993 1.1 christos return;
994 1.1 christos }
995 1.1 christos
996 1.1 christos init_char_order ();
997 1.1 christos
998 1.1 christos /* Sort routines want to know this address. */
999 1.1 christos
1000 1.1 christos text_base = data;
1001 1.1 christos
1002 1.1 christos /* Create the array of pointers to lines, with a default size
1003 1.1 christos frequently enough. */
1004 1.1 christos
1005 1.1 christos nlines = total / 50;
1006 1.1 christos if (!nlines)
1007 1.1 christos nlines = 2;
1008 1.1 christos linearray = (char **) xmalloc (nlines * sizeof (char *));
1009 1.1 christos
1010 1.1 christos /* `nextline' points to the next free slot in this array.
1011 1.1 christos `nlines' is the allocated size. */
1012 1.1 christos
1013 1.1 christos nextline = linearray;
1014 1.1 christos
1015 1.1 christos /* Parse the input file's data, and make entries for the lines. */
1016 1.1 christos
1017 1.1 christos nextline = parsefile (infile, nextline, file_data, file_size);
1018 1.1 christos if (nextline == 0)
1019 1.1 christos {
1020 1.1 christos error (_("%s: not a texinfo index file"), infile);
1021 1.1 christos return;
1022 1.1 christos }
1023 1.1 christos
1024 1.1 christos /* Sort the lines. */
1025 1.1 christos
1026 1.1 christos /* If we have enough space, find the first keyfield of each line in advance.
1027 1.1 christos Make a `struct lineinfo' for each line, which records the keyfield
1028 1.1 christos as well as the line, and sort them. */
1029 1.1 christos
1030 1.1 christos lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo));
1031 1.1 christos
1032 1.1 christos if (lineinfo)
1033 1.1 christos {
1034 1.1 christos struct lineinfo *lp;
1035 1.1 christos char **p;
1036 1.1 christos
1037 1.1 christos for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1038 1.1 christos {
1039 1.1 christos lp->text = *p;
1040 1.1 christos lp->key.text = find_field (keyfields, *p, &lp->keylen);
1041 1.1 christos if (keyfields->numeric)
1042 1.1 christos lp->key.number = find_value (lp->key.text, lp->keylen);
1043 1.1 christos }
1044 1.1 christos
1045 1.1 christos qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
1046 1.1 christos compare_prepared);
1047 1.1 christos
1048 1.1 christos for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
1049 1.1 christos *p = lp->text;
1050 1.1 christos
1051 1.1 christos free (lineinfo);
1052 1.1 christos }
1053 1.1 christos else
1054 1.1 christos qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
1055 1.1 christos
1056 1.1 christos /* Open the output file. */
1057 1.1 christos
1058 1.1 christos if (outfile)
1059 1.1 christos {
1060 1.1 christos ostream = fopen (outfile, "w");
1061 1.1 christos if (!ostream)
1062 1.1 christos pfatal_with_name (outfile);
1063 1.1 christos }
1064 1.1 christos
1065 1.1 christos writelines (linearray, nextline - linearray, ostream);
1066 1.1 christos if (outfile)
1067 1.1 christos fclose (ostream);
1068 1.1 christos
1069 1.1 christos free (linearray);
1070 1.1 christos free (data);
1071 1.1 christos }
1072 1.1 christos
1073 1.1 christos /* Parse an input string in core into lines.
1075 1.1 christos DATA is the input string, and SIZE is its length.
1076 1.1 christos Data goes in LINEARRAY starting at NEXTLINE.
1077 1.1 christos The value returned is the first entry in LINEARRAY still unused.
1078 1.1 christos Value 0 means input file contents are invalid. */
1079 1.1 christos
1080 1.1 christos char **
1081 1.1 christos parsefile (char *filename, char **nextline, char *data, long int size)
1082 1.1 christos {
1083 1.1 christos char *p, *end;
1084 1.1 christos char **line = nextline;
1085 1.1 christos
1086 1.1 christos p = data;
1087 1.1 christos end = p + size;
1088 1.1 christos *end = 0;
1089 1.1 christos
1090 1.1 christos while (p != end)
1091 1.1 christos {
1092 1.1 christos if (p[0] != '\\' && p[0] != '@')
1093 1.1 christos return 0;
1094 1.1 christos
1095 1.1 christos *line = p;
1096 1.1 christos
1097 1.1 christos /* Find the first letter of the first field of this line. If it
1098 1.1 christos is different from the first letter of the first field of the
1099 1.1 christos first line, we need initial headers in the output index. */
1100 1.1 christos while (*p && *p != '{')
1101 1.1 christos p++;
1102 1.1 christos if (p == end)
1103 1.1 christos return 0;
1104 1.1 christos p++;
1105 1.1 christos if (first_initial)
1106 1.1 christos {
1107 1.1 christos if (first_initial != toupper (*p))
1108 1.1 christos need_initials = 1;
1109 1.1 christos }
1110 1.1 christos else
1111 1.1 christos first_initial = toupper (*p);
1112 1.1 christos
1113 1.1 christos while (*p && *p != '\n')
1114 1.1 christos p++;
1115 1.1 christos if (p != end)
1116 1.1 christos p++;
1117 1.1 christos
1118 1.1 christos line++;
1119 1.1 christos if (line == linearray + nlines)
1120 1.1 christos {
1121 1.1 christos char **old = linearray;
1122 1.1 christos linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4));
1123 1.1 christos line += linearray - old;
1124 1.1 christos }
1125 1.1 christos }
1126 1.1 christos
1127 1.1 christos return line;
1128 1.1 christos }
1129 1.1 christos
1130 1.1 christos /* Indexification is a filter applied to the sorted lines
1132 1.1 christos as they are being written to the output file.
1133 1.1 christos Multiple entries for the same name, with different page numbers,
1134 1.1 christos get combined into a single entry with multiple page numbers.
1135 1.1 christos The first braced field, which is used for sorting, is discarded.
1136 1.1 christos However, its first character is examined, folded to lower case,
1137 1.1 christos and if it is different from that in the previous line fed to us
1138 1.1 christos a \initial line is written with one argument, the new initial.
1139 1.1 christos
1140 1.1 christos If an entry has four braced fields, then the second and third
1141 1.1 christos constitute primary and secondary names.
1142 1.1 christos In this case, each change of primary name
1143 1.1 christos generates a \primary line which contains only the primary name,
1144 1.1 christos and in between these are \secondary lines which contain
1145 1.1 christos just a secondary name and page numbers. */
1146 1.1 christos
1147 1.1 christos /* The last primary name we wrote a \primary entry for.
1148 1.1 christos If only one level of indexing is being done, this is the last name seen. */
1149 1.1 christos char *lastprimary;
1150 1.1 christos /* Length of storage allocated for lastprimary. */
1151 1.1 christos int lastprimarylength;
1152 1.1 christos
1153 1.1 christos /* Similar, for the secondary name. */
1154 1.1 christos char *lastsecondary;
1155 1.1 christos int lastsecondarylength;
1156 1.1 christos
1157 1.1 christos /* Zero if we are not in the middle of writing an entry.
1158 1.1 christos One if we have written the beginning of an entry but have not
1159 1.1 christos yet written any page numbers into it.
1160 1.1 christos Greater than one if we have written the beginning of an entry
1161 1.1 christos plus at least one page number. */
1162 1.1 christos int pending;
1163 1.1 christos
1164 1.1 christos /* The initial (for sorting purposes) of the last primary entry written.
1165 1.1 christos When this changes, a \initial {c} line is written */
1166 1.1 christos
1167 1.1 christos char *lastinitial;
1168 1.1 christos
1169 1.1 christos int lastinitiallength;
1170 1.1 christos
1171 1.1 christos /* When we need a string of length 1 for the value of lastinitial,
1172 1.1 christos store it here. */
1173 1.1 christos
1174 1.1 christos char lastinitial1[2];
1175 1.1 christos
1176 1.1 christos /* Initialize static storage for writing an index. */
1177 1.1 christos
1178 1.1 christos void
1179 1.1 christos init_index (void)
1180 1.1 christos {
1181 1.1 christos pending = 0;
1182 1.1 christos lastinitial = lastinitial1;
1183 1.1 christos lastinitial1[0] = 0;
1184 1.1 christos lastinitial1[1] = 0;
1185 1.1 christos lastinitiallength = 0;
1186 1.1 christos lastprimarylength = 100;
1187 1.1 christos lastprimary = (char *) xmalloc (lastprimarylength + 1);
1188 1.1 christos memset (lastprimary, '\0', lastprimarylength + 1);
1189 1.1 christos lastsecondarylength = 100;
1190 1.1 christos lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
1191 1.1 christos memset (lastsecondary, '\0', lastsecondarylength + 1);
1192 1.1 christos }
1193 1.1 christos
1194 1.1 christos /* Indexify. Merge entries for the same name,
1195 1.1 christos insert headers for each initial character, etc. */
1196 1.1 christos
1197 1.1 christos void
1198 1.1 christos indexify (char *line, FILE *ostream)
1199 1.1 christos {
1200 1.1 christos char *primary, *secondary, *pagenumber;
1201 1.1 christos int primarylength, secondarylength = 0, pagelength;
1202 1.1 christos int nosecondary;
1203 1.1 christos int initiallength;
1204 1.1 christos char *initial;
1205 1.1 christos char initial1[2];
1206 1.1 christos register char *p;
1207 1.1 christos
1208 1.1 christos /* First, analyze the parts of the entry fed to us this time. */
1209 1.1 christos
1210 1.1 christos p = find_braced_pos (line, 0, 0, 0);
1211 1.1 christos if (*p == '{')
1212 1.1 christos {
1213 1.1 christos initial = p;
1214 1.1 christos /* Get length of inner pair of braces starting at `p',
1215 1.1 christos including that inner pair of braces. */
1216 1.1 christos initiallength = find_braced_end (p + 1) + 1 - p;
1217 1.1 christos }
1218 1.1 christos else
1219 1.1 christos {
1220 1.1 christos initial = initial1;
1221 1.1 christos initial1[0] = toupper (*p);
1222 1.1 christos initial1[1] = 0;
1223 1.1 christos initiallength = 1;
1224 1.1 christos }
1225 1.1 christos
1226 1.1 christos pagenumber = find_braced_pos (line, 1, 0, 0);
1227 1.1 christos pagelength = find_braced_end (pagenumber) - pagenumber;
1228 1.1 christos if (pagelength == 0)
1229 1.1 christos fatal (_("No page number in %s"), line);
1230 1.1 christos
1231 1.1 christos primary = find_braced_pos (line, 2, 0, 0);
1232 1.1 christos primarylength = find_braced_end (primary) - primary;
1233 1.1 christos
1234 1.1 christos secondary = find_braced_pos (line, 3, 0, 0);
1235 1.1 christos nosecondary = !*secondary;
1236 1.1 christos if (!nosecondary)
1237 1.1 christos secondarylength = find_braced_end (secondary) - secondary;
1238 1.1 christos
1239 1.1 christos /* If the primary is different from before, make a new primary entry. */
1240 1.1 christos if (strncmp (primary, lastprimary, primarylength))
1241 1.1 christos {
1242 1.1 christos /* Close off current secondary entry first, if one is open. */
1243 1.1 christos if (pending)
1244 1.1 christos {
1245 1.1 christos fputs ("}\n", ostream);
1246 1.1 christos pending = 0;
1247 1.1 christos }
1248 1.1 christos
1249 1.1 christos /* If this primary has a different initial, include an entry for
1250 1.1 christos the initial. */
1251 1.1 christos if (need_initials &&
1252 1.1 christos (initiallength != lastinitiallength ||
1253 1.1 christos strncmp (initial, lastinitial, initiallength)))
1254 1.1 christos {
1255 1.1 christos fprintf (ostream, "\\initial {");
1256 1.1 christos fwrite (initial, 1, initiallength, ostream);
1257 1.1 christos fputs ("}\n", ostream);
1258 1.1 christos if (initial == initial1)
1259 1.1 christos {
1260 1.1 christos lastinitial = lastinitial1;
1261 1.1 christos *lastinitial1 = *initial1;
1262 1.1 christos }
1263 1.1 christos else
1264 1.1 christos {
1265 1.1 christos lastinitial = initial;
1266 1.1 christos }
1267 1.1 christos lastinitiallength = initiallength;
1268 1.1 christos }
1269 1.1 christos
1270 1.1 christos /* Make the entry for the primary. */
1271 1.1 christos if (nosecondary)
1272 1.1 christos fputs ("\\entry {", ostream);
1273 1.1 christos else
1274 1.1 christos fputs ("\\primary {", ostream);
1275 1.1 christos fwrite (primary, primarylength, 1, ostream);
1276 1.1 christos if (nosecondary)
1277 1.1 christos {
1278 1.1 christos fputs ("}{", ostream);
1279 1.1 christos pending = 1;
1280 1.1 christos }
1281 1.1 christos else
1282 1.1 christos fputs ("}\n", ostream);
1283 1.1 christos
1284 1.1 christos /* Record name of most recent primary. */
1285 1.1 christos if (lastprimarylength < primarylength)
1286 1.1 christos {
1287 1.1 christos lastprimarylength = primarylength + 100;
1288 1.1 christos lastprimary = (char *) xrealloc (lastprimary,
1289 1.1 christos 1 + lastprimarylength);
1290 1.1 christos }
1291 1.1 christos strncpy (lastprimary, primary, primarylength);
1292 1.1 christos lastprimary[primarylength] = 0;
1293 1.1 christos
1294 1.1 christos /* There is no current secondary within this primary, now. */
1295 1.1 christos lastsecondary[0] = 0;
1296 1.1 christos }
1297 1.1 christos
1298 1.1 christos /* Should not have an entry with no subtopic following one with a
1299 1.1 christos subtopic. */
1300 1.1 christos
1301 1.1 christos if (nosecondary && *lastsecondary)
1302 1.1 christos error (_("entry %s follows an entry with a secondary name"), line);
1303 1.1 christos
1304 1.1 christos /* Start a new secondary entry if necessary. */
1305 1.1 christos if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
1306 1.1 christos {
1307 1.1 christos if (pending)
1308 1.1 christos {
1309 1.1 christos fputs ("}\n", ostream);
1310 1.1 christos pending = 0;
1311 1.1 christos }
1312 1.1 christos
1313 1.1 christos /* Write the entry for the secondary. */
1314 1.1 christos fputs ("\\secondary {", ostream);
1315 1.1 christos fwrite (secondary, secondarylength, 1, ostream);
1316 1.1 christos fputs ("}{", ostream);
1317 1.1 christos pending = 1;
1318 1.1 christos
1319 1.1 christos /* Record name of most recent secondary. */
1320 1.1 christos if (lastsecondarylength < secondarylength)
1321 1.1 christos {
1322 1.1 christos lastsecondarylength = secondarylength + 100;
1323 1.1 christos lastsecondary = (char *) xrealloc (lastsecondary,
1324 1.1 christos 1 + lastsecondarylength);
1325 1.1 christos }
1326 1.1 christos strncpy (lastsecondary, secondary, secondarylength);
1327 1.1 christos lastsecondary[secondarylength] = 0;
1328 1.1 christos }
1329 1.1 christos
1330 1.1 christos /* Here to add one more page number to the current entry. */
1331 1.1 christos if (pending++ != 1)
1332 1.1 christos fputs (", ", ostream); /* Punctuate first, if this is not the first. */
1333 1.1 christos fwrite (pagenumber, pagelength, 1, ostream);
1334 1.1 christos }
1335 1.1 christos
1336 1.1 christos /* Close out any unfinished output entry. */
1337 1.1 christos
1338 1.1 christos void
1339 1.1 christos finish_index (FILE *ostream)
1340 1.1 christos {
1341 1.1 christos if (pending)
1342 1.1 christos fputs ("}\n", ostream);
1343 1.1 christos free (lastprimary);
1344 1.1 christos free (lastsecondary);
1345 1.1 christos }
1346 1.1 christos
1347 1.1 christos /* Copy the lines in the sorted order.
1349 1.1 christos Each line is copied out of the input file it was found in. */
1350 1.1 christos
1351 1.1 christos void
1352 1.1 christos writelines (char **linearray, int nlines, FILE *ostream)
1353 1.1 christos {
1354 1.1 christos char **stop_line = linearray + nlines;
1355 1.1 christos char **next_line;
1356 1.1 christos
1357 1.1 christos init_index ();
1358 1.1 christos
1359 1.1 christos /* Output the text of the lines, and free the buffer space. */
1360 1.1 christos
1361 1.1 christos for (next_line = linearray; next_line != stop_line; next_line++)
1362 1.1 christos {
1363 1.1 christos /* If -u was specified, output the line only if distinct from
1364 1.1 christos previous one. */
1365 1.1 christos if (next_line == linearray
1366 1.1 christos /* Compare previous line with this one, using only the
1367 1.1 christos explicitly specd keyfields. */
1368 1.1 christos || compare_general (*(next_line - 1), *next_line, 0L, 0L,
1369 1.1 christos num_keyfields - 1))
1370 1.1 christos {
1371 1.1 christos char *p = *next_line;
1372 1.1 christos char c;
1373 1.1 christos
1374 1.1 christos while ((c = *p++) && c != '\n')
1375 1.1 christos /* Do nothing. */ ;
1376 1.1 christos *(p - 1) = 0;
1377 1.1 christos indexify (*next_line, ostream);
1378 1.1 christos }
1379 1.1 christos }
1380 1.1 christos
1381 1.1 christos finish_index (ostream);
1382 1.1 christos }
1383 1.1 christos
1384 1.1 christos /* Assume (and optionally verify) that each input file is sorted;
1386 1.1 christos merge them and output the result.
1387 1.1 christos Returns nonzero if any input file fails to be sorted.
1388 1.1 christos
1389 1.1 christos This is the high-level interface that can handle an unlimited
1390 1.1 christos number of files. */
1391 1.1 christos
1392 1.1 christos #define MAX_DIRECT_MERGE 10
1393 1.1 christos
1394 1.1 christos int
1395 1.1 christos merge_files (char **infiles, int nfiles, char *outfile)
1396 1.1 christos {
1397 1.1 christos char **tempfiles;
1398 1.1 christos int ntemps;
1399 1.1 christos int i;
1400 1.1 christos int value = 0;
1401 1.1 christos int start_tempcount = tempcount;
1402 1.1 christos
1403 1.1 christos if (nfiles <= MAX_DIRECT_MERGE)
1404 1.1 christos return merge_direct (infiles, nfiles, outfile);
1405 1.1 christos
1406 1.1 christos /* Merge groups of MAX_DIRECT_MERGE input files at a time,
1407 1.1 christos making a temporary file to hold each group's result. */
1408 1.1 christos
1409 1.1 christos ntemps = (nfiles + MAX_DIRECT_MERGE - 1) / MAX_DIRECT_MERGE;
1410 1.1 christos tempfiles = (char **) xmalloc (ntemps * sizeof (char *));
1411 1.1 christos for (i = 0; i < ntemps; i++)
1412 1.1 christos {
1413 1.1 christos int nf = MAX_DIRECT_MERGE;
1414 1.1 christos if (i + 1 == ntemps)
1415 1.1 christos nf = nfiles - i * MAX_DIRECT_MERGE;
1416 1.1 christos tempfiles[i] = maketempname (++tempcount);
1417 1.1 christos value |= merge_direct (&infiles[i * MAX_DIRECT_MERGE], nf, tempfiles[i]);
1418 1.1 christos }
1419 1.1 christos
1420 1.1 christos /* All temporary files that existed before are no longer needed
1421 1.1 christos since their contents have been merged into our new tempfiles.
1422 1.1 christos So delete them. */
1423 1.1 christos flush_tempfiles (start_tempcount);
1424 1.1 christos
1425 1.1 christos /* Now merge the temporary files we created. */
1426 1.1 christos
1427 1.1 christos merge_files (tempfiles, ntemps, outfile);
1428 1.1 christos
1429 1.1 christos free (tempfiles);
1430 1.1 christos
1431 1.1 christos return value;
1432 1.1 christos }
1433 1.1 christos
1434 1.1 christos /* Assume (and optionally verify) that each input file is sorted;
1436 1.1 christos merge them and output the result.
1437 1.1 christos Returns nonzero if any input file fails to be sorted.
1438 1.1 christos
1439 1.1 christos This version of merging will not work if the number of
1440 1.1 christos input files gets too high. Higher level functions
1441 1.1 christos use it only with a bounded number of input files. */
1442 1.1 christos
1443 1.1 christos int
1444 1.1 christos merge_direct (char **infiles, int nfiles, char *outfile)
1445 1.1 christos {
1446 1.1 christos struct linebuffer *lb1, *lb2;
1447 1.1 christos struct linebuffer **thisline, **prevline;
1448 1.1 christos FILE **streams;
1449 1.1 christos int i;
1450 1.1 christos int nleft;
1451 1.1 christos int lossage = 0;
1452 1.1 christos int *file_lossage;
1453 1.1 christos struct linebuffer *prev_out = 0;
1454 1.1 christos FILE *ostream = stdout;
1455 1.1 christos
1456 1.1 christos if (outfile)
1457 1.1 christos {
1458 1.1 christos ostream = fopen (outfile, "w");
1459 1.1 christos }
1460 1.1 christos if (!ostream)
1461 1.1 christos pfatal_with_name (outfile);
1462 1.1 christos
1463 1.1 christos init_index ();
1464 1.1 christos
1465 1.1 christos if (nfiles == 0)
1466 1.1 christos {
1467 1.1 christos if (outfile)
1468 1.1 christos fclose (ostream);
1469 1.1 christos return 0;
1470 1.1 christos }
1471 1.1 christos
1472 1.1 christos /* For each file, make two line buffers. Also, for each file, there
1473 1.1 christos is an element of `thisline' which points at any time to one of the
1474 1.1 christos file's two buffers, and an element of `prevline' which points to
1475 1.1 christos the other buffer. `thisline' is supposed to point to the next
1476 1.1 christos available line from the file, while `prevline' holds the last file
1477 1.1 christos line used, which is remembered so that we can verify that the file
1478 1.1 christos is properly sorted. */
1479 1.1 christos
1480 1.1 christos /* lb1 and lb2 contain one buffer each per file. */
1481 1.1 christos lb1 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1482 1.1 christos lb2 = (struct linebuffer *) xmalloc (nfiles * sizeof (struct linebuffer));
1483 1.1 christos
1484 1.1 christos /* thisline[i] points to the linebuffer holding the next available
1485 1.1 christos line in file i, or is zero if there are no lines left in that file. */
1486 1.1 christos thisline = (struct linebuffer **)
1487 1.1 christos xmalloc (nfiles * sizeof (struct linebuffer *));
1488 1.1 christos /* prevline[i] points to the linebuffer holding the last used line
1489 1.1 christos from file i. This is just for verifying that file i is properly
1490 1.1 christos sorted. */
1491 1.1 christos prevline = (struct linebuffer **)
1492 1.1 christos xmalloc (nfiles * sizeof (struct linebuffer *));
1493 1.1 christos /* streams[i] holds the input stream for file i. */
1494 1.1 christos streams = (FILE **) xmalloc (nfiles * sizeof (FILE *));
1495 1.1 christos /* file_lossage[i] is nonzero if we already know file i is not
1496 1.1 christos properly sorted. */
1497 1.1 christos file_lossage = (int *) xmalloc (nfiles * sizeof (int));
1498 1.1 christos
1499 1.1 christos /* Allocate and initialize all that storage. */
1500 1.1 christos
1501 1.1 christos for (i = 0; i < nfiles; i++)
1502 1.1 christos {
1503 1.1 christos initbuffer (&lb1[i]);
1504 1.1 christos initbuffer (&lb2[i]);
1505 1.1 christos thisline[i] = &lb1[i];
1506 1.1 christos prevline[i] = &lb2[i];
1507 1.1 christos file_lossage[i] = 0;
1508 1.1 christos streams[i] = fopen (infiles[i], "r");
1509 1.1 christos if (!streams[i])
1510 1.1 christos pfatal_with_name (infiles[i]);
1511 1.1 christos
1512 1.1 christos readline (thisline[i], streams[i]);
1513 1.1 christos }
1514 1.1 christos
1515 1.1 christos /* Keep count of number of files not at eof. */
1516 1.1 christos nleft = nfiles;
1517 1.1 christos
1518 1.1 christos while (nleft)
1519 1.1 christos {
1520 1.1 christos struct linebuffer *best = 0;
1521 1.1 christos struct linebuffer *exch;
1522 1.1 christos int bestfile = -1;
1523 1.1 christos int i;
1524 1.1 christos
1525 1.1 christos /* Look at the next avail line of each file; choose the least one. */
1526 1.1 christos
1527 1.1 christos for (i = 0; i < nfiles; i++)
1528 1.1 christos {
1529 1.1 christos if (thisline[i] &&
1530 1.1 christos (!best ||
1531 1.1 christos 0 < compare_general (best->buffer, thisline[i]->buffer,
1532 1.1 christos (long) bestfile, (long) i, num_keyfields)))
1533 1.1 christos {
1534 1.1 christos best = thisline[i];
1535 1.1 christos bestfile = i;
1536 1.1 christos }
1537 1.1 christos }
1538 1.1 christos
1539 1.1 christos /* Output that line, unless it matches the previous one and we
1540 1.1 christos don't want duplicates. */
1541 1.1 christos
1542 1.1 christos if (!(prev_out &&
1543 1.1 christos !compare_general (prev_out->buffer,
1544 1.1 christos best->buffer, 0L, 1L, num_keyfields - 1)))
1545 1.1 christos indexify (best->buffer, ostream);
1546 1.1 christos prev_out = best;
1547 1.1 christos
1548 1.1 christos /* Now make the line the previous of its file, and fetch a new
1549 1.1 christos line from that file. */
1550 1.1 christos
1551 1.1 christos exch = prevline[bestfile];
1552 1.1 christos prevline[bestfile] = thisline[bestfile];
1553 1.1 christos thisline[bestfile] = exch;
1554 1.1 christos
1555 1.1 christos while (1)
1556 1.1 christos {
1557 1.1 christos /* If the file has no more, mark it empty. */
1558 1.1 christos
1559 1.1 christos if (feof (streams[bestfile]))
1560 1.1 christos {
1561 1.1 christos thisline[bestfile] = 0;
1562 1.1 christos /* Update the number of files still not empty. */
1563 1.1 christos nleft--;
1564 1.1 christos break;
1565 1.1 christos }
1566 1.1 christos readline (thisline[bestfile], streams[bestfile]);
1567 1.1 christos if (thisline[bestfile]->buffer[0] || !feof (streams[bestfile]))
1568 1.1 christos break;
1569 1.1 christos }
1570 1.1 christos }
1571 1.1 christos
1572 1.1 christos finish_index (ostream);
1573 1.1 christos
1574 1.1 christos /* Free all storage and close all input streams. */
1575 1.1 christos
1576 1.1 christos for (i = 0; i < nfiles; i++)
1577 1.1 christos {
1578 1.1 christos fclose (streams[i]);
1579 1.1 christos free (lb1[i].buffer);
1580 1.1 christos free (lb2[i].buffer);
1581 1.1 christos }
1582 1.1 christos free (file_lossage);
1583 1.1 christos free (lb1);
1584 1.1 christos free (lb2);
1585 1.1 christos free (thisline);
1586 1.1 christos free (prevline);
1587 1.1 christos free (streams);
1588 1.1 christos
1589 1.1 christos if (outfile)
1590 1.1 christos fclose (ostream);
1591 1.1 christos
1592 1.1 christos return lossage;
1593 1.1 christos }
1594 1.1 christos
1595 1.1 christos /* Print error message and exit. */
1597 1.1 christos
1598 1.1 christos void
1599 1.1 christos fatal (const char *format, const char *arg)
1600 1.1 christos {
1601 1.1 christos error (format, arg);
1602 1.1 christos xexit (1);
1603 1.1 christos }
1604 1.1 christos
1605 1.1 christos /* Print error message. FORMAT is printf control string, ARG is arg for it. */
1606 1.1 christos void
1607 1.1 christos error (const char *format, const char *arg)
1608 1.1 christos {
1609 1.1 christos printf ("%s: ", program_name);
1610 1.1 christos printf (format, arg);
1611 1.1 christos if (format[strlen (format) -1] != '\n')
1612 1.1 christos printf ("\n");
1613 1.1 christos }
1614 1.1 christos
1615 1.1 christos void
1616 1.1 christos perror_with_name (const char *name)
1617 1.1 christos {
1618 1.1 christos fprintf (stderr, "%s: ", program_name);
1619 1.1 christos perror (name);
1620 1.1 christos }
1621 1.1 christos
1622 1.1 christos void
1623 1.1 christos pfatal_with_name (const char *name)
1624 1.1 christos {
1625 1.1 christos perror_with_name (name);
1626 1.1 christos xexit (1);
1627 1.1 christos }
1628 1.1 christos
1629 1.1 christos
1630 1.1 christos /* Return a newly-allocated string concatenating S1 and S2. */
1632
1633 char *
1634 concat (char *s1, char *s2)
1635 {
1636 int len1 = strlen (s1), len2 = strlen (s2);
1637 char *result = (char *) xmalloc (len1 + len2 + 1);
1638
1639 strcpy (result, s1);
1640 strcpy (result + len1, s2);
1641 *(result + len1 + len2) = 0;
1642
1643 return result;
1644 }
1645