bm.c revision 1.10 1 /* $NetBSD: bm.c,v 1.10 2000/01/22 22:19:20 mycroft Exp $ */
2
3 /*-
4 * Copyright (c) 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Andrew Hume of AT&T Bell Laboratories.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #if defined(LIBC_SCCS) && !defined(lint)
41 #if 0
42 static char sccsid[] = "@(#)bm.c 8.7 (Berkeley) 6/21/94";
43 #else
44 __RCSID("$NetBSD: bm.c,v 1.10 2000/01/22 22:19:20 mycroft Exp $");
45 #endif
46 #endif /* LIBC_SCCS && not lint */
47
48 #include "namespace.h"
49 #include <sys/types.h>
50
51 #include <assert.h>
52 #include <bm.h>
53 #include <errno.h>
54 #include <stdlib.h>
55 #include <string.h>
56
57 #ifdef __weak_alias
58 __weak_alias(bm_comp,_bm_comp)
59 __weak_alias(bm_exec,_bm_exec)
60 __weak_alias(bm_free,_bm_free)
61 #endif
62
63 /*
64 * XXX
65 * The default frequency table starts at 99 and counts down. The default
66 * table should probably be oriented toward text, and will necessarily be
67 * locale specific. This one is for English. It was derived from the
68 * OSF/1 and 4.4BSD formatted and unformatted manual pages, and about 100Mb
69 * of email and random text. Change it if you can find something better.
70 */
71 static u_char const freq_def[256] = {
72 0, 0, 0, 0, 0, 0, 0, 0,
73 0, 77, 90, 0, 0, 0, 0, 0,
74 0, 0, 0, 0, 0, 0, 0, 0,
75 0, 0, 0, 0, 0, 0, 0, 0,
76 99, 28, 42, 27, 16, 14, 20, 51,
77 66, 65, 59, 24, 75, 76, 84, 56,
78 72, 74, 64, 55, 54, 47, 41, 37,
79 44, 61, 70, 43, 23, 53, 49, 22,
80 33, 58, 40, 46, 45, 57, 60, 26,
81 30, 63, 21, 12, 32, 50, 38, 39,
82 34, 11, 48, 67, 62, 35, 15, 29,
83 71, 18, 9, 17, 25, 13, 10, 52,
84 36, 95, 78, 86, 87, 98, 82, 80,
85 88, 94, 19, 68, 89, 83, 93, 96,
86 81, 7, 91, 92, 97, 85, 69, 73,
87 31, 79, 8, 5, 4, 6, 3, 0,
88 0, 0, 0, 0, 0, 0, 0, 0,
89 0, 0, 0, 0, 0, 0, 0, 0,
90 0, 0, 0, 0, 0, 0, 0, 0,
91 0, 0, 0, 0, 0, 0, 0, 0,
92 0, 0, 0, 0, 0, 0, 0, 0,
93 0, 0, 0, 0, 0, 0, 0, 0,
94 0, 0, 0, 0, 0, 0, 0, 0,
95 0, 0, 0, 0, 0, 0, 0, 0,
96 0, 0, 0, 0, 0, 0, 0, 0,
97 0, 0, 0, 0, 0, 0, 0, 0,
98 0, 0, 0, 0, 0, 0, 0, 0,
99 0, 0, 0, 0, 0, 0, 0, 0,
100 0, 0, 0, 0, 0, 0, 0, 0,
101 0, 0, 0, 0, 0, 0, 0, 0,
102 0, 0, 0, 0, 0, 0, 0, 0,
103 0, 0, 0, 0, 0, 0, 0, 0,
104 };
105
106 bm_pat *
107 bm_comp(pb, len, freq)
108 u_char const *pb;
109 size_t len;
110 u_char const *freq;
111 {
112 u_char const *pe, *p;
113 size_t *d, r;
114 int j;
115 int sv_errno;
116 bm_pat *pat;
117
118 _DIAGASSERT(pb != NULL);
119 /* freq may be NULL */
120
121 if (len == 0) {
122 errno = EINVAL;
123 return (NULL);
124 }
125 if ((pat = malloc(sizeof(*pat))) == NULL)
126 return (NULL);
127 pat->pat = NULL;
128 pat->delta = NULL;
129
130 pat->patlen = len; /* copy pattern */
131 if ((pat->pat = malloc(pat->patlen)) == NULL)
132 goto mem;
133 memcpy(pat->pat, pb, pat->patlen);
134 /* get skip delta */
135 if ((pat->delta = malloc(256 * sizeof(*d))) == NULL)
136 goto mem;
137 for (j = 0, d = pat->delta; j < 256; j++)
138 d[j] = pat->patlen;
139 for (pe = pb + pat->patlen - 1; pb <= pe; pb++)
140 d[*pb] = pe - pb;
141
142 if (freq == NULL) /* default freq table */
143 freq = freq_def;
144 r = 0; /* get guard */
145 for (pb = pat->pat, pe = pb + pat->patlen - 1; pb < pe; pb++)
146 if (freq[*pb] < freq[pat->pat[r]])
147 r = pb - pat->pat;
148 pat->rarec = pat->pat[r];
149 pat->rareoff = r - (pat->patlen - 1);
150
151 /* get md2 shift */
152 for (pe = pat->pat + pat->patlen - 1, p = pe - 1; p >= pat->pat; p--)
153 if (*p == *pe)
154 break;
155
156 /* *p is first leftward reoccurrence of *pe */
157 pat->md2 = pe - p;
158 return (pat);
159
160 mem: sv_errno = errno;
161 bm_free(pat);
162 errno = sv_errno;
163 return (NULL);
164 }
165
166 void
167 bm_free(pat)
168 bm_pat *pat;
169 {
170
171 _DIAGASSERT(pat != NULL);
172
173 if (pat->pat != NULL)
174 free(pat->pat);
175 if (pat->delta != NULL)
176 free(pat->delta);
177 free(pat);
178 }
179
180 u_char *
181 bm_exec(pat, base, n)
182 bm_pat *pat;
183 u_char *base;
184 size_t n;
185 {
186 u_char *e, *ep, *p, *q, *s;
187 size_t *d0, k, md2, n1, ro;
188 int rc;
189
190 _DIAGASSERT(pat != NULL);
191 _DIAGASSERT(base != NULL);
192
193 if (n == 0)
194 return (NULL);
195
196 d0 = pat->delta;
197 n1 = pat->patlen - 1;
198 md2 = pat->md2;
199 ro = pat->rareoff;
200 rc = pat->rarec;
201 ep = pat->pat + pat->patlen - 1;
202 s = base + (pat->patlen - 1);
203
204 /* fast loop up to n - 3 * patlen */
205 e = base + n - 3 * pat->patlen;
206 while (s < e) {
207 k = d0[*s]; /* ufast skip loop */
208 while (k) {
209 k = d0[*(s += k)];
210 k = d0[*(s += k)];
211 }
212 if (s >= e)
213 break;
214 if (s[ro] != rc) /* guard test */
215 goto mismatch1;
216 /* fwd match */
217 for (p = pat->pat, q = s - n1; p < ep;)
218 if (*q++ != *p++)
219 goto mismatch1;
220 return (s - n1);
221
222 mismatch1: s += md2; /* md2 shift */
223 }
224
225 /* slow loop up to end */
226 e = base + n;
227 while (s < e) {
228 s += d0[*s]; /* step */
229 if (s >= e)
230 break;
231 if (s[ro] != rc) /* guard test */
232 goto mismatch2;
233 /* fwd match */
234 for (p = pat->pat, q = s - n1; p <= ep;)
235 if (*q++ != *p++)
236 goto mismatch2;
237 return (s - n1);
238
239 mismatch2: s += md2; /* md2 shift */
240 }
241
242 return (NULL);
243 }
244