texindex.c revision 1.5 1 /* $NetBSD: texindex.c,v 1.5 2025/12/30 10:35:22 martin Exp $ */
2
3 /* texindex -- sort TeX index dribble output into an actual index.
4 Id: texindex.c,v 1.11 2004/04/11 17:56:47 karl Exp
5
6 Copyright (C) 1987, 1991, 1992, 1996, 1997, 1998, 1999, 2000, 2001,
7 2002, 2003, 2004 Free Software Foundation, Inc.
8
9 This program is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 This program is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with this program; if not, write to the Free Software
21 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307. */
22
23 #include "system.h"
24 #include <getopt.h>
25
26 static char *program_name = "texindex";
27
28 #if defined (emacs)
29 # include "../src/config.h"
30 /* Some s/os.h files redefine these. */
31 # undef read
32 # undef close
33 # undef write
34 # undef open
35 #endif
36
37 #if !defined (HAVE_MEMSET)
38 #undef memset
39 #define memset(ptr, ignore, count) bzero (ptr, count)
40 #endif
41
42 #if !defined (SEEK_SET)
43 # define SEEK_SET 0
44 # define SEEK_CUR 1
45 # define SEEK_END 2
46 #endif /* !SEEK_SET */
47
48 /* When sorting in core, this structure describes one line
49 and the position and length of its first keyfield. */
50 struct lineinfo
51 {
52 char *text; /* The actual text of the line. */
53 union {
54 char *text; /* The start of the key (for textual comparison). */
55 long number; /* The numeric value (for numeric comparison). */
56 } key;
57 long keylen; /* Length of KEY field. */
58 size_t idx; /* tie breaker */
59 };
60
61 /* This structure describes a field to use as a sort key. */
62 struct keyfield
63 {
64 int startwords; /* Number of words to skip. */
65 int startchars; /* Number of additional chars to skip. */
66 int endwords; /* Number of words to ignore at end. */
67 int endchars; /* Ditto for characters of last word. */
68 char ignore_blanks; /* Non-zero means ignore spaces and tabs. */
69 char fold_case; /* Non-zero means case doesn't matter. */
70 char reverse; /* Non-zero means compare in reverse order. */
71 char numeric; /* Non-zeros means field is ASCII numeric. */
72 char positional; /* Sort according to file position. */
73 char braced; /* Count balanced-braced groupings as fields. */
74 };
75
76 /* Vector of keyfields to use. */
77 struct keyfield keyfields[3];
78
79 /* Number of keyfields stored in that vector. */
80 int num_keyfields = 3;
81
82 /* Vector of input file names, terminated with a null pointer. */
83 char **infiles;
84
85 /* Vector of corresponding output file names, or NULL, meaning default it
86 (add an `s' to the end). */
87 char **outfiles;
88
89 /* Length of `infiles'. */
90 int num_infiles;
91
92 /* Pointer to the array of pointers to lines being sorted. */
93 char **linearray;
94
95 /* The allocated length of `linearray'. */
96 long nlines;
97
98 /* During in-core sort, this points to the base of the data block
99 which contains all the lines of data. */
100 char *text_base;
101
102 /* Initially 0; changed to 1 if we want initials in this index. */
103 int need_initials;
104
105 /* Remembers the first initial letter seen in this index, so we can
106 determine whether we need initials in the sorted form. */
107 char first_initial;
108
109 /* Forward declarations of functions in this file. */
110 void decode_command (int argc, char **argv);
111 void sort_in_core (char *infile, int total, char *outfile);
112 char **parsefile (char *filename, char **nextline, char *data, long int size);
113 char *find_field (struct keyfield *keyfield, char *str, long int *lengthptr);
114 char *find_pos (char *str, int words, int chars, int ignore_blanks);
115 long find_value (char *start, long int length);
116 char *find_braced_pos (char *str, int words, int chars, int ignore_blanks);
117 char *find_braced_end (char *str);
118 void writelines (char **linearray, int nlines, FILE *ostream);
119 int compare_field (struct keyfield *keyfield, char *start1,
120 long int length1, long int pos1, char *start2,
121 long int length2, long int pos2);
122 int compare_full (const void *, const void *);
123 void pfatal_with_name (const char *name);
124 void fatal (const char *format, const char *arg);
125 void error (const char *format, const char *arg);
126 void *xmalloc (), *xrealloc ();
127 static char *concat3 (const char *, const char *, const char *);
128
129 int
131 main (int argc, char **argv)
132 {
133 int i;
134
135 #ifdef HAVE_SETLOCALE
136 /* Set locale via LC_ALL. */
137 setlocale (LC_ALL, "");
138 #endif
139
140 /* Set the text message domain. */
141 bindtextdomain (PACKAGE, LOCALEDIR);
142 textdomain (PACKAGE);
143
144 /* In case we write to a redirected stdout that fails. */
145 /* not ready atexit (close_stdout); */
146
147 /* Describe the kind of sorting to do. */
148 /* The first keyfield uses the first braced field and folds case. */
149 keyfields[0].braced = 1;
150 keyfields[0].fold_case = 1;
151 keyfields[0].endwords = -1;
152 keyfields[0].endchars = -1;
153
154 /* The second keyfield uses the second braced field, numerically. */
155 keyfields[1].braced = 1;
156 keyfields[1].numeric = 1;
157 keyfields[1].startwords = 1;
158 keyfields[1].endwords = -1;
159 keyfields[1].endchars = -1;
160
161 /* The third keyfield (which is ignored while discarding duplicates)
162 compares the whole line. */
163 keyfields[2].endwords = -1;
164 keyfields[2].endchars = -1;
165
166 decode_command (argc, argv);
167
168 /* Process input files completely, one by one. */
169
170 for (i = 0; i < num_infiles; i++)
171 {
172 int desc;
173 off_t ptr;
174 char *outfile;
175 struct stat instat;
176
177 desc = open (infiles[i], O_RDONLY, 0);
178 if (desc < 0)
179 pfatal_with_name (infiles[i]);
180
181 if (stat (infiles[i], &instat))
182 pfatal_with_name (infiles[i]);
183 if (S_ISDIR (instat.st_mode))
184 {
185 #ifdef EISDIR
186 errno = EISDIR;
187 #endif
188 pfatal_with_name (infiles[i]);
189 }
190
191 lseek (desc, (off_t) 0, SEEK_END);
192 ptr = (off_t) lseek (desc, (off_t) 0, SEEK_CUR);
193
194 close (desc);
195
196 outfile = outfiles[i];
197 if (!outfile)
198 outfile = concat3 (infiles[i], "s", "");
199
200 need_initials = 0;
201 first_initial = '\0';
202
203 if (ptr != (int)ptr)
204 {
205 fprintf (stderr, "%s: %s: file too large\n", program_name,
206 infiles[i]);
207 xexit (1);
208 }
209 sort_in_core (infiles[i], (int)ptr, outfile);
210 }
211
212 xexit (0);
213 return 0; /* Avoid bogus warnings. */
214 }
215
216 typedef struct
218 {
219 char *long_name;
220 char *short_name;
221 int *variable_ref;
222 int variable_value;
223 char *arg_name;
224 char *doc_string;
225 } TEXINDEX_OPTION;
226
227 TEXINDEX_OPTION texindex_options[] = {
228 { "--help", "-h", (int *)NULL, 0, (char *)NULL,
229 N_("display this help and exit") },
230 { "--output", "-o", (int *)NULL, 0, "FILE",
231 N_("send output to FILE") },
232 { "--version", (char *)NULL, (int *)NULL, 0, (char *)NULL,
233 N_("display version information and exit") },
234 { (char *)NULL, (char *)NULL, (int *)NULL, 0, (char *)NULL }
235 };
236
237 void
238 usage (int result_value)
239 {
240 register int i;
241 FILE *f = result_value ? stderr : stdout;
242
243 fprintf (f, _("Usage: %s [OPTION]... FILE...\n"), program_name);
244 fprintf (f, _("Generate a sorted index for each TeX output FILE.\n"));
245 /* Avoid trigraph nonsense. */
246 fprintf (f,
247 _("Usually FILE... is specified as `foo.%c%c\' for a document `foo.texi'.\n"),
248 '?', '?'); /* avoid trigraph in cat-id-tbl.c */
249 fprintf (f, _("\nOptions:\n"));
250
251 for (i = 0; texindex_options[i].long_name; i++)
252 {
253 putc (' ', f);
254
255 if (texindex_options[i].short_name)
256 fprintf (f, "%s, ", texindex_options[i].short_name);
257
258 fprintf (f, "%s %s",
259 texindex_options[i].long_name,
260 texindex_options[i].arg_name
261 ? texindex_options[i].arg_name : "");
262
263 fprintf (f, "\t%s\n", _(texindex_options[i].doc_string));
264 }
265 fputs (_("\n\
266 Email bug reports to bug-texinfo (at) gnu.org,\n\
267 general questions and discussion to help-texinfo (at) gnu.org.\n\
268 Texinfo home page: http://www.gnu.org/software/texinfo/"), f);
269 fputs ("\n", f);
270
271 xexit (result_value);
272 }
273
274 /* Decode the command line arguments to set the parameter variables
275 and set up the vector of keyfields and the vector of input files. */
276
277 void
278 decode_command (int argc, char **argv)
279 {
280 int arg_index = 1;
281 char **ip;
282 char **op;
283
284 /* Allocate ARGC input files, which must be enough. */
285
286 infiles = (char **) xmalloc (argc * sizeof (char *));
287 outfiles = (char **) xmalloc (argc * sizeof (char *));
288 ip = infiles;
289 op = outfiles;
290
291 while (arg_index < argc)
292 {
293 char *arg = argv[arg_index++];
294
295 if (*arg == '-')
296 {
297 if (strcmp (arg, "--version") == 0)
298 {
299 printf ("texindex (GNU %s) %s\n", PACKAGE, VERSION);
300 puts ("");
301 puts ("Copyright (C) 2004 Free Software Foundation, Inc.");
302 printf (_("There is NO warranty. You may redistribute this software\n\
303 under the terms of the GNU General Public License.\n\
304 For more information about these matters, see the files named COPYING.\n"));
305 xexit (0);
306 }
307 else if ((strcmp (arg, "--keep") == 0) ||
308 (strcmp (arg, "-k") == 0))
309 {
310 /* Ignore, for backward compatibility */
311 }
312 else if ((strcmp (arg, "--help") == 0) ||
313 (strcmp (arg, "-h") == 0))
314 {
315 usage (0);
316 }
317 else if ((strcmp (arg, "--output") == 0) ||
318 (strcmp (arg, "-o") == 0))
319 {
320 if (argv[arg_index] != (char *)NULL)
321 {
322 arg_index++;
323 if (op > outfiles)
324 *(op - 1) = argv[arg_index];
325 }
326 else
327 usage (1);
328 }
329 else
330 usage (1);
331 }
332 else
333 {
334 *ip++ = arg;
335 *op++ = (char *)NULL;
336 }
337 }
338
339 /* Record number of keyfields and terminate list of filenames. */
340 num_infiles = ip - infiles;
341 *ip = (char *)NULL;
342 if (num_infiles == 0)
343 usage (1);
344 }
345
346 /* Compare LINE1 and LINE2 according to the specified set of keyfields. */
348
349 int
350 compare_full (const void *p1, const void *p2)
351 {
352 char **line1 = (char **) p1;
353 char **line2 = (char **) p2;
354 int i;
355
356 /* Compare using the first keyfield;
357 if that does not distinguish the lines, try the second keyfield;
358 and so on. */
359
360 for (i = 0; i < num_keyfields; i++)
361 {
362 long length1, length2;
363 char *start1 = find_field (&keyfields[i], *line1, &length1);
364 char *start2 = find_field (&keyfields[i], *line2, &length2);
365 int tem = compare_field (&keyfields[i], start1, length1,
366 *line1 - text_base,
367 start2, length2, *line2 - text_base);
368 if (tem)
369 {
370 if (keyfields[i].reverse)
371 return -tem;
372 return tem;
373 }
374 }
375
376 if (*line1 == *line2)
377 abort ();
378 return *line1 < *line2 ? -1 : 1;
379 }
380
381 /* Compare LINE1 and LINE2, described by structures
382 in which the first keyfield is identified in advance.
383 For positional sorting, assumes that the order of the lines in core
384 reflects their nominal order. */
385 int
386 compare_prepared (const void *p1, const void *p2)
387 {
388 struct lineinfo *line1 = (struct lineinfo *) p1;
389 struct lineinfo *line2 = (struct lineinfo *) p2;
390 int i;
391 int tem;
392 char *text1, *text2;
393
394 /* Compare using the first keyfield, which has been found for us already. */
395 if (keyfields->positional)
396 {
397 if (line1->text - text_base > line2->text - text_base)
398 tem = 1;
399 else
400 tem = -1;
401 }
402 else if (keyfields->numeric)
403 tem = line1->key.number - line2->key.number;
404 else
405 tem = compare_field (keyfields, line1->key.text, line1->keylen, 0,
406 line2->key.text, line2->keylen, 0);
407 if (tem)
408 {
409 if (keyfields->reverse)
410 return -tem;
411 return tem;
412 }
413
414 text1 = line1->text;
415 text2 = line2->text;
416
417 /* Compare using the second keyfield;
418 if that does not distinguish the lines, try the third keyfield;
419 and so on. */
420
421 for (i = 1; i < num_keyfields; i++)
422 {
423 long length1, length2;
424 char *start1 = find_field (&keyfields[i], text1, &length1);
425 char *start2 = find_field (&keyfields[i], text2, &length2);
426 int tem = compare_field (&keyfields[i], start1, length1,
427 text1 - text_base,
428 start2, length2, text2 - text_base);
429 if (tem)
430 {
431 if (keyfields[i].reverse)
432 return -tem;
433 return tem;
434 }
435 }
436
437 if (line1->idx == line2->idx)
438 abort ();
439 return line1->idx < line2->idx ? -1 : 1;
440 }
441
442 /* Like compare_full but more general.
443 You can pass any strings, and you can say how many keyfields to use.
444 POS1 and POS2 should indicate the nominal positional ordering of
445 the two lines in the input. */
446
447 int
448 compare_general (char *str1, char *str2, long int pos1, long int pos2, int use_keyfields)
449 {
450 int i;
451
452 /* Compare using the first keyfield;
453 if that does not distinguish the lines, try the second keyfield;
454 and so on. */
455
456 for (i = 0; i < use_keyfields; i++)
457 {
458 long length1, length2;
459 char *start1 = find_field (&keyfields[i], str1, &length1);
460 char *start2 = find_field (&keyfields[i], str2, &length2);
461 int tem = compare_field (&keyfields[i], start1, length1, pos1,
462 start2, length2, pos2);
463 if (tem)
464 {
465 if (keyfields[i].reverse)
466 return -tem;
467 return tem;
468 }
469 }
470
471 return 0; /* Lines match exactly. */
472 }
473
474 /* Find the start and length of a field in STR according to KEYFIELD.
475 A pointer to the starting character is returned, and the length
476 is stored into the int that LENGTHPTR points to. */
477
478 char *
479 find_field (struct keyfield *keyfield, char *str, long int *lengthptr)
480 {
481 char *start;
482 char *end;
483 char *(*fun) ();
484
485 if (keyfield->braced)
486 fun = find_braced_pos;
487 else
488 fun = find_pos;
489
490 start = (*fun) (str, keyfield->startwords, keyfield->startchars,
491 keyfield->ignore_blanks);
492 if (keyfield->endwords < 0)
493 {
494 if (keyfield->braced)
495 end = find_braced_end (start);
496 else
497 {
498 end = start;
499 while (*end && *end != '\n')
500 end++;
501 }
502 }
503 else
504 {
505 end = (*fun) (str, keyfield->endwords, keyfield->endchars, 0);
506 if (end - str < start - str)
507 end = start;
508 }
509 *lengthptr = end - start;
510 return start;
511 }
512
513 /* Return a pointer to a specified place within STR,
514 skipping (from the beginning) WORDS words and then CHARS chars.
515 If IGNORE_BLANKS is nonzero, we skip all blanks
516 after finding the specified word. */
517
518 char *
519 find_pos (char *str, int words, int chars, int ignore_blanks)
520 {
521 int i;
522 char *p = str;
523
524 for (i = 0; i < words; i++)
525 {
526 char c;
527 /* Find next bunch of nonblanks and skip them. */
528 while ((c = *p) == ' ' || c == '\t')
529 p++;
530 while ((c = *p) && c != '\n' && !(c == ' ' || c == '\t'))
531 p++;
532 if (!*p || *p == '\n')
533 return p;
534 }
535
536 while (*p == ' ' || *p == '\t')
537 p++;
538
539 for (i = 0; i < chars; i++)
540 {
541 if (!*p || *p == '\n')
542 break;
543 p++;
544 }
545 return p;
546 }
547
548 /* Like find_pos but assumes that each field is surrounded by braces
549 and that braces within fields are balanced. */
550
551 char *
552 find_braced_pos (char *str, int words, int chars, int ignore_blanks)
553 {
554 int i;
555 int bracelevel;
556 char *p = str;
557 char c;
558
559 for (i = 0; i < words; i++)
560 {
561 bracelevel = 1;
562 while ((c = *p++) != '{' && c != '\n' && c)
563 /* Do nothing. */ ;
564 if (c != '{')
565 return p - 1;
566 while (bracelevel)
567 {
568 c = *p++;
569 if (c == '{')
570 bracelevel++;
571 if (c == '}')
572 bracelevel--;
573 if (c == 0 || c == '\n')
574 return p - 1;
575 }
576 }
577
578 while ((c = *p++) != '{' && c != '\n' && c)
579 /* Do nothing. */ ;
580
581 if (c != '{')
582 return p - 1;
583
584 if (ignore_blanks)
585 while ((c = *p) == ' ' || c == '\t')
586 p++;
587
588 for (i = 0; i < chars; i++)
589 {
590 if (!*p || *p == '\n')
591 break;
592 p++;
593 }
594 return p;
595 }
596
597 /* Find the end of the balanced-brace field which starts at STR.
598 The position returned is just before the closing brace. */
599
600 char *
601 find_braced_end (char *str)
602 {
603 int bracelevel;
604 char *p = str;
605 char c;
606
607 bracelevel = 1;
608 while (bracelevel)
609 {
610 c = *p++;
611 if (c == '{')
612 bracelevel++;
613 if (c == '}')
614 bracelevel--;
615 if (c == 0 || c == '\n')
616 return p - 1;
617 }
618 return p - 1;
619 }
620
621 long
622 find_value (char *start, long int length)
623 {
624 while (length != 0L)
625 {
626 if (isdigit (*start))
627 return atol (start);
628 length--;
629 start++;
630 }
631 return 0l;
632 }
633
634 /* Vector used to translate characters for comparison.
635 This is how we make all alphanumerics follow all else,
636 and ignore case in the first sorting. */
637 int char_order[256];
638
639 void
640 init_char_order (void)
641 {
642 int i;
643 for (i = 1; i < 256; i++)
644 char_order[i] = i;
645
646 for (i = '0'; i <= '9'; i++)
647 char_order[i] += 512;
648
649 for (i = 'a'; i <= 'z'; i++)
650 {
651 char_order[i] = 512 + i;
652 char_order[i + 'A' - 'a'] = 512 + i;
653 }
654 }
655
656 /* Compare two fields (each specified as a start pointer and a character count)
657 according to KEYFIELD.
658 The sign of the value reports the relation between the fields. */
659
660 int
661 compare_field (struct keyfield *keyfield, char *start1, long int length1,
662 long int pos1, char *start2, long int length2, long int pos2)
663 {
664 if (keyfields->positional)
665 {
666 if (pos1 > pos2)
667 return 1;
668 else
669 return -1;
670 }
671 if (keyfield->numeric)
672 {
673 long value = find_value (start1, length1) - find_value (start2, length2);
674 if (value > 0)
675 return 1;
676 if (value < 0)
677 return -1;
678 return 0;
679 }
680 else
681 {
682 char *p1 = start1;
683 char *p2 = start2;
684 char *e1 = start1 + length1;
685 char *e2 = start2 + length2;
686
687 while (1)
688 {
689 int c1, c2;
690
691 if (p1 == e1)
692 c1 = 0;
693 else
694 c1 = *p1++;
695 if (p2 == e2)
696 c2 = 0;
697 else
698 c2 = *p2++;
699
700 if (char_order[c1] != char_order[c2])
701 return char_order[c1] - char_order[c2];
702 if (!c1)
703 break;
704 }
705
706 /* Strings are equal except possibly for case. */
707 p1 = start1;
708 p2 = start2;
709 while (1)
710 {
711 int c1, c2;
712
713 if (p1 == e1)
714 c1 = 0;
715 else
716 c1 = *p1++;
717 if (p2 == e2)
718 c2 = 0;
719 else
720 c2 = *p2++;
721
722 if (c1 != c2)
723 /* Reverse sign here so upper case comes out last. */
724 return c2 - c1;
725 if (!c1)
726 break;
727 }
728
729 return 0;
730 }
731 }
732
733 /* Sort INFILE, whose size is TOTAL,
735 assuming that is small enough to be done in-core,
736 then indexify it and send the output to OUTFILE (or to stdout). */
737
738 void
739 sort_in_core (char *infile, int total, char *outfile)
740 {
741 char **nextline;
742 char *data = (char *) xmalloc (total + 1);
743 char *file_data;
744 long file_size;
745 int i;
746 FILE *ostream = stdout;
747 struct lineinfo *lineinfo;
748
749 /* Read the contents of the file into the moby array `data'. */
750
751 int desc = open (infile, O_RDONLY, 0);
752
753 if (desc < 0)
754 fatal (_("failure reopening %s"), infile);
755 for (file_size = 0;;)
756 {
757 i = read (desc, data + file_size, total - file_size);
758 if (i <= 0)
759 break;
760 file_size += i;
761 }
762 file_data = data;
763 data[file_size] = 0;
764
765 close (desc);
766
767 if (file_size > 0 && data[0] != '\\' && data[0] != '@')
768 {
769 error (_("%s: not a texinfo index file"), infile);
770 return;
771 }
772
773 init_char_order ();
774
775 /* Sort routines want to know this address. */
776
777 text_base = data;
778
779 /* Create the array of pointers to lines, with a default size
780 frequently enough. */
781
782 nlines = total / 50;
783 if (!nlines)
784 nlines = 2;
785 linearray = (char **) xmalloc (nlines * sizeof (char *));
786
787 /* `nextline' points to the next free slot in this array.
788 `nlines' is the allocated size. */
789
790 nextline = linearray;
791
792 /* Parse the input file's data, and make entries for the lines. */
793
794 nextline = parsefile (infile, nextline, file_data, file_size);
795 if (nextline == 0)
796 {
797 error (_("%s: not a texinfo index file"), infile);
798 return;
799 }
800
801 /* Sort the lines. */
802
803 /* If we have enough space, find the first keyfield of each line in advance.
804 Make a `struct lineinfo' for each line, which records the keyfield
805 as well as the line, and sort them. */
806
807 lineinfo = malloc ((nextline - linearray) * sizeof (struct lineinfo));
808
809 if (lineinfo)
810 {
811 size_t idx = 0;
812 struct lineinfo *lp;
813 char **p;
814
815 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
816 {
817 lp->idx = idx++;
818 lp->text = *p;
819 lp->key.text = find_field (keyfields, *p, &lp->keylen);
820 if (keyfields->numeric)
821 lp->key.number = find_value (lp->key.text, lp->keylen);
822 }
823
824 qsort (lineinfo, nextline - linearray, sizeof (struct lineinfo),
825 compare_prepared);
826
827 for (lp = lineinfo, p = linearray; p != nextline; lp++, p++)
828 *p = lp->text;
829
830 free (lineinfo);
831 }
832 else
833 qsort (linearray, nextline - linearray, sizeof (char *), compare_full);
834
835 /* Open the output file. */
836
837 if (outfile)
838 {
839 ostream = fopen (outfile, "w");
840 if (!ostream)
841 pfatal_with_name (outfile);
842 }
843
844 writelines (linearray, nextline - linearray, ostream);
845 if (outfile)
846 fclose (ostream);
847
848 free (linearray);
849 free (data);
850 }
851
852 /* Parse an input string in core into lines.
854 DATA is the input string, and SIZE is its length.
855 Data goes in LINEARRAY starting at NEXTLINE.
856 The value returned is the first entry in LINEARRAY still unused.
857 Value 0 means input file contents are invalid. */
858
859 char **
860 parsefile (char *filename, char **nextline, char *data, long int size)
861 {
862 char *p, *end;
863 char **line = nextline;
864
865 p = data;
866 end = p + size;
867 *end = 0;
868
869 while (p != end)
870 {
871 if (p[0] != '\\' && p[0] != '@')
872 return 0;
873
874 *line = p;
875
876 /* Find the first letter of the first field of this line. If it
877 is different from the first letter of the first field of the
878 first line, we need initial headers in the output index. */
879 while (*p && *p != '{')
880 p++;
881 if (p == end)
882 return 0;
883 p++;
884 if (first_initial)
885 {
886 if (first_initial != toupper (*p))
887 need_initials = 1;
888 }
889 else
890 first_initial = toupper (*p);
891
892 while (*p && *p != '\n')
893 p++;
894 if (p != end)
895 p++;
896
897 line++;
898 if (line == linearray + nlines)
899 {
900 char **old = linearray;
901 linearray = xrealloc (linearray, sizeof (char *) * (nlines *= 4));
902 line += linearray - old;
903 }
904 }
905
906 return line;
907 }
908
909 /* Indexification is a filter applied to the sorted lines
911 as they are being written to the output file.
912 Multiple entries for the same name, with different page numbers,
913 get combined into a single entry with multiple page numbers.
914 The first braced field, which is used for sorting, is discarded.
915 However, its first character is examined, folded to lower case,
916 and if it is different from that in the previous line fed to us
917 a \initial line is written with one argument, the new initial.
918
919 If an entry has four braced fields, then the second and third
920 constitute primary and secondary names.
921 In this case, each change of primary name
922 generates a \primary line which contains only the primary name,
923 and in between these are \secondary lines which contain
924 just a secondary name and page numbers. */
925
926 /* The last primary name we wrote a \primary entry for.
927 If only one level of indexing is being done, this is the last name seen. */
928 char *lastprimary;
929 /* Length of storage allocated for lastprimary. */
930 int lastprimarylength;
931
932 /* Similar, for the secondary name. */
933 char *lastsecondary;
934 int lastsecondarylength;
935
936 /* Zero if we are not in the middle of writing an entry.
937 One if we have written the beginning of an entry but have not
938 yet written any page numbers into it.
939 Greater than one if we have written the beginning of an entry
940 plus at least one page number. */
941 int pending;
942
943 /* The initial (for sorting purposes) of the last primary entry written.
944 When this changes, a \initial {c} line is written */
945
946 char *lastinitial;
947
948 int lastinitiallength;
949
950 /* When we need a string of length 1 for the value of lastinitial,
951 store it here. */
952
953 char lastinitial1[2];
954
955 /* Initialize static storage for writing an index. */
956
957 void
958 init_index (void)
959 {
960 pending = 0;
961 lastinitial = lastinitial1;
962 lastinitial1[0] = 0;
963 lastinitial1[1] = 0;
964 lastinitiallength = 0;
965 lastprimarylength = 100;
966 lastprimary = (char *) xmalloc (lastprimarylength + 1);
967 memset (lastprimary, '\0', lastprimarylength + 1);
968 lastsecondarylength = 100;
969 lastsecondary = (char *) xmalloc (lastsecondarylength + 1);
970 memset (lastsecondary, '\0', lastsecondarylength + 1);
971 }
972
973 /* Indexify. Merge entries for the same name,
974 insert headers for each initial character, etc. */
975
976 void
977 indexify (char *line, FILE *ostream)
978 {
979 char *primary, *secondary, *pagenumber;
980 int primarylength, secondarylength = 0, pagelength;
981 int nosecondary;
982 int initiallength;
983 char *initial;
984 char initial1[2];
985 register char *p;
986
987 /* First, analyze the parts of the entry fed to us this time. */
988
989 p = find_braced_pos (line, 0, 0, 0);
990 if (*p == '{')
991 {
992 initial = p;
993 /* Get length of inner pair of braces starting at `p',
994 including that inner pair of braces. */
995 initiallength = find_braced_end (p + 1) + 1 - p;
996 }
997 else
998 {
999 initial = initial1;
1000 initial1[0] = toupper (*p);
1001 initial1[1] = 0;
1002 initiallength = 1;
1003 }
1004
1005 pagenumber = find_braced_pos (line, 1, 0, 0);
1006 pagelength = find_braced_end (pagenumber) - pagenumber;
1007 if (pagelength == 0)
1008 fatal (_("No page number in %s"), line);
1009
1010 primary = find_braced_pos (line, 2, 0, 0);
1011 primarylength = find_braced_end (primary) - primary;
1012
1013 secondary = find_braced_pos (line, 3, 0, 0);
1014 nosecondary = !*secondary;
1015 if (!nosecondary)
1016 secondarylength = find_braced_end (secondary) - secondary;
1017
1018 /* If the primary is different from before, make a new primary entry. */
1019 if (strncmp (primary, lastprimary, primarylength))
1020 {
1021 /* Close off current secondary entry first, if one is open. */
1022 if (pending)
1023 {
1024 fputs ("}\n", ostream);
1025 pending = 0;
1026 }
1027
1028 /* If this primary has a different initial, include an entry for
1029 the initial. */
1030 if (need_initials &&
1031 (initiallength != lastinitiallength ||
1032 strncmp (initial, lastinitial, initiallength)))
1033 {
1034 fprintf (ostream, "\\initial {");
1035 fwrite (initial, 1, initiallength, ostream);
1036 fputs ("}\n", ostream);
1037 if (initial == initial1)
1038 {
1039 lastinitial = lastinitial1;
1040 *lastinitial1 = *initial1;
1041 }
1042 else
1043 {
1044 lastinitial = initial;
1045 }
1046 lastinitiallength = initiallength;
1047 }
1048
1049 /* Make the entry for the primary. */
1050 if (nosecondary)
1051 fputs ("\\entry {", ostream);
1052 else
1053 fputs ("\\primary {", ostream);
1054 fwrite (primary, primarylength, 1, ostream);
1055 if (nosecondary)
1056 {
1057 fputs ("}{", ostream);
1058 pending = 1;
1059 }
1060 else
1061 fputs ("}\n", ostream);
1062
1063 /* Record name of most recent primary. */
1064 if (lastprimarylength < primarylength)
1065 {
1066 lastprimarylength = primarylength + 100;
1067 lastprimary = (char *) xrealloc (lastprimary,
1068 1 + lastprimarylength);
1069 }
1070 strncpy (lastprimary, primary, primarylength);
1071 lastprimary[primarylength] = 0;
1072
1073 /* There is no current secondary within this primary, now. */
1074 lastsecondary[0] = 0;
1075 }
1076
1077 /* Should not have an entry with no subtopic following one with a
1078 subtopic. */
1079
1080 if (nosecondary && *lastsecondary)
1081 error (_("entry %s follows an entry with a secondary name"), line);
1082
1083 /* Start a new secondary entry if necessary. */
1084 if (!nosecondary && strncmp (secondary, lastsecondary, secondarylength))
1085 {
1086 if (pending)
1087 {
1088 fputs ("}\n", ostream);
1089 pending = 0;
1090 }
1091
1092 /* Write the entry for the secondary. */
1093 fputs ("\\secondary {", ostream);
1094 fwrite (secondary, secondarylength, 1, ostream);
1095 fputs ("}{", ostream);
1096 pending = 1;
1097
1098 /* Record name of most recent secondary. */
1099 if (lastsecondarylength < secondarylength)
1100 {
1101 lastsecondarylength = secondarylength + 100;
1102 lastsecondary = (char *) xrealloc (lastsecondary,
1103 1 + lastsecondarylength);
1104 }
1105 strncpy (lastsecondary, secondary, secondarylength);
1106 lastsecondary[secondarylength] = 0;
1107 }
1108
1109 /* Here to add one more page number to the current entry. */
1110 if (pending++ != 1)
1111 fputs (", ", ostream); /* Punctuate first, if this is not the first. */
1112 fwrite (pagenumber, pagelength, 1, ostream);
1113 }
1114
1115 /* Close out any unfinished output entry. */
1116
1117 void
1118 finish_index (FILE *ostream)
1119 {
1120 if (pending)
1121 fputs ("}\n", ostream);
1122 free (lastprimary);
1123 free (lastsecondary);
1124 }
1125
1126 /* Copy the lines in the sorted order.
1128 Each line is copied out of the input file it was found in. */
1129
1130 void
1131 writelines (char **linearray, int nlines, FILE *ostream)
1132 {
1133 char **stop_line = linearray + nlines;
1134 char **next_line;
1135
1136 init_index ();
1137
1138 /* Output the text of the lines, and free the buffer space. */
1139
1140 for (next_line = linearray; next_line != stop_line; next_line++)
1141 {
1142 /* Output the line only if distinct from previous one. */
1143 if (next_line == linearray
1144 /* Compare previous line with this one, using only the
1145 explicitly specd keyfields. */
1146 || compare_general (*(next_line - 1), *next_line, 0L, 0L,
1147 num_keyfields - 1))
1148 {
1149 char *p = *next_line;
1150 char c;
1151
1152 while ((c = *p++) && c != '\n')
1153 /* Do nothing. */ ;
1154 *(p - 1) = 0;
1155 indexify (*next_line, ostream);
1156 }
1157 }
1158
1159 finish_index (ostream);
1160 }
1161
1162 /* Print error message and exit. */
1164
1165 void
1166 fatal (const char *format, const char *arg)
1167 {
1168 error (format, arg);
1169 xexit (1);
1170 }
1171
1172 /* Print error message. FORMAT is printf control string, ARG is arg for it. */
1173 void
1174 error (const char *format, const char *arg)
1175 {
1176 printf ("%s: ", program_name);
1177 printf (format, arg);
1178 if (format[strlen (format) -1] != '\n')
1179 printf ("\n");
1180 }
1181
1182 void
1183 perror_with_name (const char *name)
1184 {
1185 fprintf (stderr, "%s: ", program_name);
1186 perror (name);
1187 }
1188
1189 void
1190 pfatal_with_name (const char *name)
1191 {
1192 perror_with_name (name);
1193 xexit (1);
1194 }
1195
1196
1197 /* Return a newly-allocated string concatenating S1, S2, and S3. */
1199
1200 static char *
1201 concat3 (const char *s1, const char *s2, const char *s3)
1202 {
1203 int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
1204 char *result = (char *) xmalloc (len1 + len2 + len3 + 1);
1205
1206 strcpy (result, s1);
1207 strcpy (result + len1, s2);
1208 strcpy (result + len1 + len2, s3);
1209 *(result + len1 + len2 + len3) = 0;
1210
1211 return result;
1212 }
1213