Home | History | Annotate | Line # | Download | only in string
bm.c revision 1.8
      1 /*	$NetBSD: bm.c,v 1.8 1999/09/16 11:45:38 lukem Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1994
      5  *	The Regents of the University of California.  All rights reserved.
      6  *
      7  * This code is derived from software contributed to Berkeley by
      8  * Andrew Hume of AT&T Bell Laboratories.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *	This product includes software developed by the University of
     21  *	California, Berkeley and its contributors.
     22  * 4. Neither the name of the University nor the names of its contributors
     23  *    may be used to endorse or promote products derived from this software
     24  *    without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     36  * SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 #if defined(LIBC_SCCS) && !defined(lint)
     41 #if 0
     42 static char sccsid[] = "@(#)bm.c	8.7 (Berkeley) 6/21/94";
     43 #else
     44 __RCSID("$NetBSD: bm.c,v 1.8 1999/09/16 11:45:38 lukem Exp $");
     45 #endif
     46 #endif /* LIBC_SCCS && not lint */
     47 
     48 #include "namespace.h"
     49 #include <sys/types.h>
     50 
     51 #include <assert.h>
     52 #include <bm.h>
     53 #include <errno.h>
     54 #include <stdlib.h>
     55 #include <string.h>
     56 
     57 #ifdef __weak_alias
     58 __weak_alias(bm_comp,_bm_comp);
     59 __weak_alias(bm_exec,_bm_exec);
     60 __weak_alias(bm_free,_bm_free);
     61 #endif
     62 
     63 /*
     64  * XXX
     65  * The default frequency table starts at 99 and counts down.  The default
     66  * table should probably be oriented toward text, and will necessarily be
     67  * locale specific.  This one is for English.  It was derived from the
     68  * OSF/1 and 4.4BSD formatted and unformatted manual pages, and about 100Mb
     69  * of email and random text.  Change it if you can find something better.
     70  */
     71 static u_char const freq_def[256] = {
     72 	 0,  0,  0,  0,  0,  0,  0,  0,
     73 	 0, 77, 90,  0,  0,  0,  0,  0,
     74 	 0,  0,  0,  0,  0,  0,  0,  0,
     75 	 0,  0,  0,  0,  0,  0,  0,  0,
     76 	99, 28, 42, 27, 16, 14, 20, 51,
     77 	66, 65, 59, 24, 75, 76, 84, 56,
     78 	72, 74, 64, 55, 54, 47, 41, 37,
     79 	44, 61, 70, 43, 23, 53, 49, 22,
     80 	33, 58, 40, 46, 45, 57, 60, 26,
     81 	30, 63, 21, 12, 32, 50, 38, 39,
     82 	34, 11, 48, 67, 62, 35, 15, 29,
     83 	71, 18,  9, 17, 25, 13, 10, 52,
     84 	36, 95, 78, 86, 87, 98, 82, 80,
     85 	88, 94, 19, 68, 89, 83, 93, 96,
     86 	81,  7, 91, 92, 97, 85, 69, 73,
     87 	31, 79,  8,  5,  4,  6,  3,  0,
     88 	 0,  0,  0,  0,  0,  0,  0,  0,
     89 	 0,  0,  0,  0,  0,  0,  0,  0,
     90 	 0,  0,  0,  0,  0,  0,  0,  0,
     91 	 0,  0,  0,  0,  0,  0,  0,  0,
     92 	 0,  0,  0,  0,  0,  0,  0,  0,
     93 	 0,  0,  0,  0,  0,  0,  0,  0,
     94 	 0,  0,  0,  0,  0,  0,  0,  0,
     95 	 0,  0,  0,  0,  0,  0,  0,  0,
     96 	 0,  0,  0,  0,  0,  0,  0,  0,
     97 	 0,  0,  0,  0,  0,  0,  0,  0,
     98 	 0,  0,  0,  0,  0,  0,  0,  0,
     99 	 0,  0,  0,  0,  0,  0,  0,  0,
    100 	 0,  0,  0,  0,  0,  0,  0,  0,
    101 	 0,  0,  0,  0,  0,  0,  0,  0,
    102 	 0,  0,  0,  0,  0,  0,  0,  0,
    103 	 0,  0,  0,  0,  0,  0,  0,  0,
    104 };
    105 
    106 bm_pat *
    107 bm_comp(pb, len, freq)
    108 	u_char const *pb;
    109 	size_t len;
    110 	u_char const *freq;
    111 {
    112 	u_char const *pe, *p;
    113 	size_t *d, r;
    114 	int j;
    115 	int sv_errno;
    116 	bm_pat *pat;
    117 
    118 	_DIAGASSERT(pb != NULL);
    119 	/* freq may be NULL */
    120 #ifdef _DIAGNOSTIC
    121 	if (pb == NULL) {
    122 		errno = EFAULT;
    123 		return (NULL);
    124 	}
    125 #endif
    126 
    127 	if (len == 0) {
    128 		errno = EINVAL;
    129 		return (NULL);
    130 	}
    131 	if ((pat = malloc(sizeof(*pat))) == NULL)
    132 		return (NULL);
    133 	pat->pat = NULL;
    134 	pat->delta = NULL;
    135 
    136 	pat->patlen = len;			/* copy pattern */
    137 	if ((pat->pat = malloc(pat->patlen)) == NULL)
    138 		goto mem;
    139 	memcpy(pat->pat, pb, pat->patlen);
    140 						/* get skip delta */
    141 	if ((pat->delta = malloc(256 * sizeof(*d))) == NULL)
    142 		goto mem;
    143 	for (j = 0, d = pat->delta; j < 256; j++)
    144 		d[j] = pat->patlen;
    145 	for (pe = pb + pat->patlen - 1; pb <= pe; pb++)
    146 		d[*pb] = pe - pb;
    147 
    148 	if (freq == NULL)			/* default freq table */
    149 		freq = freq_def;
    150 	r = 0;					/* get guard */
    151 	for (pb = pat->pat, pe = pb + pat->patlen - 1; pb < pe; pb++)
    152 		if (freq[*pb] < freq[pat->pat[r]])
    153 			r = pb - pat->pat;
    154 	pat->rarec = pat->pat[r];
    155 	pat->rareoff = r - (pat->patlen - 1);
    156 
    157 						/* get md2 shift */
    158 	for (pe = pat->pat + pat->patlen - 1, p = pe - 1; p >= pat->pat; p--)
    159 		if (*p == *pe)
    160 			break;
    161 
    162 	/* *p is first leftward reoccurrence of *pe */
    163 	pat->md2 = pe - p;
    164 	return (pat);
    165 
    166 mem:	sv_errno = errno;
    167 	bm_free(pat);
    168 	errno = sv_errno;
    169 	return (NULL);
    170 }
    171 
    172 void
    173 bm_free(pat)
    174 	bm_pat *pat;
    175 {
    176 
    177 	_DIAGASSERT(pat != NULL);
    178 #ifdef _DIAGNOSTIC
    179 	if (pat == NULL)
    180 		return;
    181 #endif
    182 
    183 	if (pat->pat != NULL)
    184 		free(pat->pat);
    185 	if (pat->delta != NULL)
    186 		free(pat->delta);
    187 	free(pat);
    188 }
    189 
    190 u_char *
    191 bm_exec(pat, base, n)
    192 	bm_pat *pat;
    193 	u_char *base;
    194 	size_t n;
    195 {
    196 	u_char *e, *ep, *p, *q, *s;
    197 	size_t *d0, k, md2, n1, ro;
    198 	int rc;
    199 
    200 	_DIAGASSERT(pat != NULL);
    201 	_DIAGASSERT(base != NULL);
    202 #ifdef _DIAGNOSTIC
    203 	if (pat == NULL || base == NULL)
    204 		return (NULL);
    205 #endif
    206 
    207 	if (n == 0)
    208 		return (NULL);
    209 
    210 	d0 = pat->delta;
    211 	n1 = pat->patlen - 1;
    212 	md2 = pat->md2;
    213 	ro = pat->rareoff;
    214 	rc = pat->rarec;
    215 	ep = pat->pat + pat->patlen - 1;
    216 	s = base + (pat->patlen - 1);
    217 
    218 	/* fast loop up to n - 3 * patlen */
    219 	e = base + n - 3 * pat->patlen;
    220 	while (s < e) {
    221 		k = d0[*s];		/* ufast skip loop */
    222 		while (k) {
    223 			k = d0[*(s += k)];
    224 			k = d0[*(s += k)];
    225 		}
    226 		if (s >= e)
    227 			break;
    228 		if (s[ro] != rc)	/* guard test */
    229 			goto mismatch1;
    230 					/* fwd match */
    231 		for (p = pat->pat, q = s - n1; p < ep;)
    232 			if (*q++ != *p++)
    233 				goto mismatch1;
    234 		return (s - n1);
    235 
    236 mismatch1:	s += md2;		/* md2 shift */
    237 	}
    238 
    239 	/* slow loop up to end */
    240 	e = base + n;
    241 	while (s < e) {
    242 		s += d0[*s];		/* step */
    243 		if (s >= e)
    244 			break;
    245 		if (s[ro] != rc)	/* guard test */
    246 			goto mismatch2;
    247 					/* fwd match */
    248 		for (p = pat->pat, q = s - n1; p <= ep;)
    249 			if (*q++ != *p++)
    250 				goto mismatch2;
    251 		return (s - n1);
    252 
    253 mismatch2:	s += md2;		/* md2 shift */
    254 	}
    255 
    256 	return (NULL);
    257 }
    258