mkindex.c revision 1.1       1 /*-
      2  * Copyright (c) 1993
      3  *	The Regents of the University of California.  All rights reserved.
      4  *
      5  * This code is derived from software contributed to Berkeley by
      6  * Barry Brachman.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. All advertising materials mentioning features or use of this software
     17  *    must display the following acknowledgement:
     18  *	This product includes software developed by the University of
     19  *	California, Berkeley and its contributors.
     20  * 4. Neither the name of the University nor the names of its contributors
     21  *    may be used to endorse or promote products derived from this software
     22  *    without specific prior written permission.
     23  *
     24  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     25  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     26  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     27  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     28  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     29  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     30  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     31  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     33  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     34  * SUCH DAMAGE.
     35  */
     36 
     37 #ifndef lint
     38 static char copyright[] =
     39 "@(#) Copyright (c) 1993\n\
     40 	The Regents of the University of California.  All rights reserved.\n";
     41 #endif /* not lint */
     42 
     43 #ifndef lint
     44 static char sccsid[] = "@(#)mkindex.c	8.1 (Berkeley) 6/11/93";
     45 #endif /* not lint */
     46 
     47 #include <stdio.h>
     48 
     49 #include "bog.h"
     50 
     51 char *nextword __P((FILE *, char *, int *, int *));
     52 
     53 int
     54 main(argc, argv)
     55 	int argc;
     56 	char *argv[];
     57 {
     58 	int clen, rlen, prev;
     59 	long off, start;
     60 	char buf[MAXWORDLEN + 1];
     61 
     62 	prev = '\0';
     63 	off = start = 0L;
     64 	while (nextword(stdin, buf, &clen, &rlen) != NULL) {
     65 		if (*buf != prev) {
     66 			if (prev != '\0')
     67 				printf("%c %6ld %6ld\n", prev, start, off - 1);
     68 			prev = *buf;
     69 			start = off;
     70 		}
     71 		off += clen + 1;
     72 	}
     73 	printf("%c %6ld %6ld\n", prev, start, off - 1);
     74 	exit(0);
     75 }
     76 
     77 /*
     78  * Return the next word in the compressed dictionary in 'buffer' or
     79  * NULL on end-of-file
     80  * Also set clen to the length of the compressed word (for mkindex) and
     81  * rlen to the strlen() of the real word
     82  */
     83 char *
     84 nextword(fp, buffer, clen, rlen)
     85 	FILE *fp;
     86 	char *buffer;
     87 	int *clen, *rlen;
     88 {
     89 	register int ch, pcount;
     90 	register char *p, *q;
     91 	static char buf[MAXWORDLEN + 1];
     92 	static int first = 1;
     93 	static int lastch = 0;
     94 
     95    	if (first) {
     96 		if ((pcount = getc(fp)) == EOF)
     97 			return (NULL);
     98 		first = 0;
     99 	}
    100 	else if ((pcount = lastch) == EOF)
    101 		return (NULL);
    102 
    103 	p = buf + (*clen = pcount);
    104 
    105 	while ((ch = getc(fp)) != EOF && ch >= 'a')
    106 			*p++ = ch;
    107 		lastch = ch;
    108 	*p = '\0';
    109 
    110 	*rlen = (int) (p - buf);
    111 	*clen = *rlen - *clen;
    112 
    113 	p = buf;
    114 	q = buffer;
    115 	while ((*q++ = *p) != '\0') {
    116 		if (*p++ == 'q')
    117 			*q++ = 'u';
    118 	}
    119 	return (buffer);
    120 }
    121