Home | History | Annotate | Line # | Download | only in string
bm.c revision 1.11
      1  1.11       agc /*	$NetBSD: bm.c,v 1.11 2003/08/07 16:43:47 agc Exp $	*/
      2   1.5  christos 
      3   1.1       cgd /*-
      4   1.1       cgd  * Copyright (c) 1994
      5   1.1       cgd  *	The Regents of the University of California.  All rights reserved.
      6   1.1       cgd  *
      7   1.1       cgd  * This code is derived from software contributed to Berkeley by
      8   1.1       cgd  * Andrew Hume of AT&T Bell Laboratories.
      9   1.1       cgd  *
     10   1.1       cgd  * Redistribution and use in source and binary forms, with or without
     11   1.1       cgd  * modification, are permitted provided that the following conditions
     12   1.1       cgd  * are met:
     13   1.1       cgd  * 1. Redistributions of source code must retain the above copyright
     14   1.1       cgd  *    notice, this list of conditions and the following disclaimer.
     15   1.1       cgd  * 2. Redistributions in binary form must reproduce the above copyright
     16   1.1       cgd  *    notice, this list of conditions and the following disclaimer in the
     17   1.1       cgd  *    documentation and/or other materials provided with the distribution.
     18  1.11       agc  * 3. Neither the name of the University nor the names of its contributors
     19   1.1       cgd  *    may be used to endorse or promote products derived from this software
     20   1.1       cgd  *    without specific prior written permission.
     21   1.1       cgd  *
     22   1.1       cgd  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     23   1.1       cgd  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     24   1.1       cgd  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     25   1.1       cgd  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     26   1.1       cgd  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     27   1.1       cgd  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     28   1.1       cgd  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     29   1.1       cgd  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     30   1.1       cgd  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     31   1.1       cgd  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     32   1.1       cgd  * SUCH DAMAGE.
     33   1.1       cgd  */
     34   1.1       cgd 
     35   1.5  christos #include <sys/cdefs.h>
     36   1.4       jtc #if defined(LIBC_SCCS) && !defined(lint)
     37   1.5  christos #if 0
     38   1.5  christos static char sccsid[] = "@(#)bm.c	8.7 (Berkeley) 6/21/94";
     39   1.5  christos #else
     40  1.11       agc __RCSID("$NetBSD: bm.c,v 1.11 2003/08/07 16:43:47 agc Exp $");
     41   1.5  christos #endif
     42   1.4       jtc #endif /* LIBC_SCCS && not lint */
     43   1.1       cgd 
     44   1.6       jtc #include "namespace.h"
     45   1.1       cgd #include <sys/types.h>
     46   1.1       cgd 
     47   1.8     lukem #include <assert.h>
     48   1.1       cgd #include <bm.h>
     49   1.1       cgd #include <errno.h>
     50   1.1       cgd #include <stdlib.h>
     51   1.1       cgd #include <string.h>
     52   1.6       jtc 
     53   1.6       jtc #ifdef __weak_alias
     54  1.10   mycroft __weak_alias(bm_comp,_bm_comp)
     55  1.10   mycroft __weak_alias(bm_exec,_bm_exec)
     56  1.10   mycroft __weak_alias(bm_free,_bm_free)
     57   1.6       jtc #endif
     58   1.1       cgd 
     59   1.1       cgd /*
     60   1.1       cgd  * XXX
     61   1.1       cgd  * The default frequency table starts at 99 and counts down.  The default
     62   1.1       cgd  * table should probably be oriented toward text, and will necessarily be
     63   1.1       cgd  * locale specific.  This one is for English.  It was derived from the
     64   1.1       cgd  * OSF/1 and 4.4BSD formatted and unformatted manual pages, and about 100Mb
     65   1.1       cgd  * of email and random text.  Change it if you can find something better.
     66   1.1       cgd  */
     67   1.1       cgd static u_char const freq_def[256] = {
     68   1.1       cgd 	 0,  0,  0,  0,  0,  0,  0,  0,
     69   1.1       cgd 	 0, 77, 90,  0,  0,  0,  0,  0,
     70   1.1       cgd 	 0,  0,  0,  0,  0,  0,  0,  0,
     71   1.1       cgd 	 0,  0,  0,  0,  0,  0,  0,  0,
     72   1.1       cgd 	99, 28, 42, 27, 16, 14, 20, 51,
     73   1.1       cgd 	66, 65, 59, 24, 75, 76, 84, 56,
     74   1.1       cgd 	72, 74, 64, 55, 54, 47, 41, 37,
     75   1.1       cgd 	44, 61, 70, 43, 23, 53, 49, 22,
     76   1.1       cgd 	33, 58, 40, 46, 45, 57, 60, 26,
     77   1.1       cgd 	30, 63, 21, 12, 32, 50, 38, 39,
     78   1.1       cgd 	34, 11, 48, 67, 62, 35, 15, 29,
     79   1.1       cgd 	71, 18,  9, 17, 25, 13, 10, 52,
     80   1.1       cgd 	36, 95, 78, 86, 87, 98, 82, 80,
     81   1.1       cgd 	88, 94, 19, 68, 89, 83, 93, 96,
     82   1.1       cgd 	81,  7, 91, 92, 97, 85, 69, 73,
     83   1.1       cgd 	31, 79,  8,  5,  4,  6,  3,  0,
     84   1.1       cgd 	 0,  0,  0,  0,  0,  0,  0,  0,
     85   1.1       cgd 	 0,  0,  0,  0,  0,  0,  0,  0,
     86   1.1       cgd 	 0,  0,  0,  0,  0,  0,  0,  0,
     87   1.1       cgd 	 0,  0,  0,  0,  0,  0,  0,  0,
     88   1.1       cgd 	 0,  0,  0,  0,  0,  0,  0,  0,
     89   1.1       cgd 	 0,  0,  0,  0,  0,  0,  0,  0,
     90   1.1       cgd 	 0,  0,  0,  0,  0,  0,  0,  0,
     91   1.1       cgd 	 0,  0,  0,  0,  0,  0,  0,  0,
     92   1.1       cgd 	 0,  0,  0,  0,  0,  0,  0,  0,
     93   1.1       cgd 	 0,  0,  0,  0,  0,  0,  0,  0,
     94   1.1       cgd 	 0,  0,  0,  0,  0,  0,  0,  0,
     95   1.1       cgd 	 0,  0,  0,  0,  0,  0,  0,  0,
     96   1.1       cgd 	 0,  0,  0,  0,  0,  0,  0,  0,
     97   1.1       cgd 	 0,  0,  0,  0,  0,  0,  0,  0,
     98   1.1       cgd 	 0,  0,  0,  0,  0,  0,  0,  0,
     99   1.1       cgd 	 0,  0,  0,  0,  0,  0,  0,  0,
    100   1.1       cgd };
    101   1.1       cgd 
    102   1.1       cgd bm_pat *
    103   1.1       cgd bm_comp(pb, len, freq)
    104   1.1       cgd 	u_char const *pb;
    105   1.1       cgd 	size_t len;
    106   1.1       cgd 	u_char const *freq;
    107   1.1       cgd {
    108   1.7     perry 	u_char const *pe, *p;
    109   1.7     perry 	size_t *d, r;
    110   1.7     perry 	int j;
    111   1.1       cgd 	int sv_errno;
    112   1.1       cgd 	bm_pat *pat;
    113   1.1       cgd 
    114   1.8     lukem 	_DIAGASSERT(pb != NULL);
    115   1.8     lukem 	/* freq may be NULL */
    116   1.8     lukem 
    117   1.1       cgd 	if (len == 0) {
    118   1.1       cgd 		errno = EINVAL;
    119   1.1       cgd 		return (NULL);
    120   1.1       cgd 	}
    121   1.1       cgd 	if ((pat = malloc(sizeof(*pat))) == NULL)
    122   1.1       cgd 		return (NULL);
    123   1.1       cgd 	pat->pat = NULL;
    124   1.1       cgd 	pat->delta = NULL;
    125   1.1       cgd 
    126   1.1       cgd 	pat->patlen = len;			/* copy pattern */
    127   1.1       cgd 	if ((pat->pat = malloc(pat->patlen)) == NULL)
    128   1.1       cgd 		goto mem;
    129   1.1       cgd 	memcpy(pat->pat, pb, pat->patlen);
    130   1.1       cgd 						/* get skip delta */
    131   1.1       cgd 	if ((pat->delta = malloc(256 * sizeof(*d))) == NULL)
    132   1.1       cgd 		goto mem;
    133   1.1       cgd 	for (j = 0, d = pat->delta; j < 256; j++)
    134   1.1       cgd 		d[j] = pat->patlen;
    135   1.1       cgd 	for (pe = pb + pat->patlen - 1; pb <= pe; pb++)
    136   1.1       cgd 		d[*pb] = pe - pb;
    137   1.1       cgd 
    138   1.1       cgd 	if (freq == NULL)			/* default freq table */
    139   1.1       cgd 		freq = freq_def;
    140   1.1       cgd 	r = 0;					/* get guard */
    141   1.1       cgd 	for (pb = pat->pat, pe = pb + pat->patlen - 1; pb < pe; pb++)
    142   1.1       cgd 		if (freq[*pb] < freq[pat->pat[r]])
    143   1.1       cgd 			r = pb - pat->pat;
    144   1.1       cgd 	pat->rarec = pat->pat[r];
    145   1.1       cgd 	pat->rareoff = r - (pat->patlen - 1);
    146   1.1       cgd 
    147   1.1       cgd 						/* get md2 shift */
    148   1.1       cgd 	for (pe = pat->pat + pat->patlen - 1, p = pe - 1; p >= pat->pat; p--)
    149   1.1       cgd 		if (*p == *pe)
    150   1.1       cgd 			break;
    151   1.1       cgd 
    152   1.1       cgd 	/* *p is first leftward reoccurrence of *pe */
    153   1.1       cgd 	pat->md2 = pe - p;
    154   1.1       cgd 	return (pat);
    155   1.1       cgd 
    156   1.1       cgd mem:	sv_errno = errno;
    157   1.1       cgd 	bm_free(pat);
    158   1.1       cgd 	errno = sv_errno;
    159   1.1       cgd 	return (NULL);
    160   1.1       cgd }
    161   1.1       cgd 
    162   1.1       cgd void
    163   1.1       cgd bm_free(pat)
    164   1.1       cgd 	bm_pat *pat;
    165   1.1       cgd {
    166   1.8     lukem 
    167   1.8     lukem 	_DIAGASSERT(pat != NULL);
    168   1.8     lukem 
    169   1.1       cgd 	if (pat->pat != NULL)
    170   1.1       cgd 		free(pat->pat);
    171   1.1       cgd 	if (pat->delta != NULL)
    172   1.1       cgd 		free(pat->delta);
    173   1.1       cgd 	free(pat);
    174   1.1       cgd }
    175   1.1       cgd 
    176   1.1       cgd u_char *
    177   1.1       cgd bm_exec(pat, base, n)
    178   1.1       cgd 	bm_pat *pat;
    179   1.1       cgd 	u_char *base;
    180   1.1       cgd 	size_t n;
    181   1.1       cgd {
    182   1.7     perry 	u_char *e, *ep, *p, *q, *s;
    183   1.7     perry 	size_t *d0, k, md2, n1, ro;
    184   1.7     perry 	int rc;
    185   1.8     lukem 
    186   1.8     lukem 	_DIAGASSERT(pat != NULL);
    187   1.8     lukem 	_DIAGASSERT(base != NULL);
    188   1.1       cgd 
    189   1.1       cgd 	if (n == 0)
    190   1.1       cgd 		return (NULL);
    191   1.1       cgd 
    192   1.1       cgd 	d0 = pat->delta;
    193   1.1       cgd 	n1 = pat->patlen - 1;
    194   1.1       cgd 	md2 = pat->md2;
    195   1.1       cgd 	ro = pat->rareoff;
    196   1.1       cgd 	rc = pat->rarec;
    197   1.1       cgd 	ep = pat->pat + pat->patlen - 1;
    198   1.1       cgd 	s = base + (pat->patlen - 1);
    199   1.1       cgd 
    200   1.1       cgd 	/* fast loop up to n - 3 * patlen */
    201   1.1       cgd 	e = base + n - 3 * pat->patlen;
    202   1.1       cgd 	while (s < e) {
    203   1.1       cgd 		k = d0[*s];		/* ufast skip loop */
    204   1.1       cgd 		while (k) {
    205   1.1       cgd 			k = d0[*(s += k)];
    206   1.1       cgd 			k = d0[*(s += k)];
    207   1.1       cgd 		}
    208   1.1       cgd 		if (s >= e)
    209   1.1       cgd 			break;
    210   1.1       cgd 		if (s[ro] != rc)	/* guard test */
    211   1.1       cgd 			goto mismatch1;
    212   1.1       cgd 					/* fwd match */
    213   1.1       cgd 		for (p = pat->pat, q = s - n1; p < ep;)
    214   1.1       cgd 			if (*q++ != *p++)
    215   1.1       cgd 				goto mismatch1;
    216   1.1       cgd 		return (s - n1);
    217   1.1       cgd 
    218   1.1       cgd mismatch1:	s += md2;		/* md2 shift */
    219   1.1       cgd 	}
    220   1.1       cgd 
    221   1.1       cgd 	/* slow loop up to end */
    222   1.1       cgd 	e = base + n;
    223   1.1       cgd 	while (s < e) {
    224   1.1       cgd 		s += d0[*s];		/* step */
    225   1.1       cgd 		if (s >= e)
    226   1.1       cgd 			break;
    227   1.1       cgd 		if (s[ro] != rc)	/* guard test */
    228   1.1       cgd 			goto mismatch2;
    229   1.1       cgd 					/* fwd match */
    230   1.3       cgd 		for (p = pat->pat, q = s - n1; p <= ep;)
    231   1.1       cgd 			if (*q++ != *p++)
    232   1.1       cgd 				goto mismatch2;
    233   1.1       cgd 		return (s - n1);
    234   1.1       cgd 
    235   1.1       cgd mismatch2:	s += md2;		/* md2 shift */
    236   1.1       cgd 	}
    237   1.1       cgd 
    238   1.1       cgd 	return (NULL);
    239   1.1       cgd }
    240