bm.c revision 1.8 1 /* $NetBSD: bm.c,v 1.8 1999/09/16 11:45:38 lukem Exp $ */
2
3 /*-
4 * Copyright (c) 1994
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Andrew Hume of AT&T Bell Laboratories.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 */
38
39 #include <sys/cdefs.h>
40 #if defined(LIBC_SCCS) && !defined(lint)
41 #if 0
42 static char sccsid[] = "@(#)bm.c 8.7 (Berkeley) 6/21/94";
43 #else
44 __RCSID("$NetBSD: bm.c,v 1.8 1999/09/16 11:45:38 lukem Exp $");
45 #endif
46 #endif /* LIBC_SCCS && not lint */
47
48 #include "namespace.h"
49 #include <sys/types.h>
50
51 #include <assert.h>
52 #include <bm.h>
53 #include <errno.h>
54 #include <stdlib.h>
55 #include <string.h>
56
57 #ifdef __weak_alias
58 __weak_alias(bm_comp,_bm_comp);
59 __weak_alias(bm_exec,_bm_exec);
60 __weak_alias(bm_free,_bm_free);
61 #endif
62
63 /*
64 * XXX
65 * The default frequency table starts at 99 and counts down. The default
66 * table should probably be oriented toward text, and will necessarily be
67 * locale specific. This one is for English. It was derived from the
68 * OSF/1 and 4.4BSD formatted and unformatted manual pages, and about 100Mb
69 * of email and random text. Change it if you can find something better.
70 */
71 static u_char const freq_def[256] = {
72 0, 0, 0, 0, 0, 0, 0, 0,
73 0, 77, 90, 0, 0, 0, 0, 0,
74 0, 0, 0, 0, 0, 0, 0, 0,
75 0, 0, 0, 0, 0, 0, 0, 0,
76 99, 28, 42, 27, 16, 14, 20, 51,
77 66, 65, 59, 24, 75, 76, 84, 56,
78 72, 74, 64, 55, 54, 47, 41, 37,
79 44, 61, 70, 43, 23, 53, 49, 22,
80 33, 58, 40, 46, 45, 57, 60, 26,
81 30, 63, 21, 12, 32, 50, 38, 39,
82 34, 11, 48, 67, 62, 35, 15, 29,
83 71, 18, 9, 17, 25, 13, 10, 52,
84 36, 95, 78, 86, 87, 98, 82, 80,
85 88, 94, 19, 68, 89, 83, 93, 96,
86 81, 7, 91, 92, 97, 85, 69, 73,
87 31, 79, 8, 5, 4, 6, 3, 0,
88 0, 0, 0, 0, 0, 0, 0, 0,
89 0, 0, 0, 0, 0, 0, 0, 0,
90 0, 0, 0, 0, 0, 0, 0, 0,
91 0, 0, 0, 0, 0, 0, 0, 0,
92 0, 0, 0, 0, 0, 0, 0, 0,
93 0, 0, 0, 0, 0, 0, 0, 0,
94 0, 0, 0, 0, 0, 0, 0, 0,
95 0, 0, 0, 0, 0, 0, 0, 0,
96 0, 0, 0, 0, 0, 0, 0, 0,
97 0, 0, 0, 0, 0, 0, 0, 0,
98 0, 0, 0, 0, 0, 0, 0, 0,
99 0, 0, 0, 0, 0, 0, 0, 0,
100 0, 0, 0, 0, 0, 0, 0, 0,
101 0, 0, 0, 0, 0, 0, 0, 0,
102 0, 0, 0, 0, 0, 0, 0, 0,
103 0, 0, 0, 0, 0, 0, 0, 0,
104 };
105
106 bm_pat *
107 bm_comp(pb, len, freq)
108 u_char const *pb;
109 size_t len;
110 u_char const *freq;
111 {
112 u_char const *pe, *p;
113 size_t *d, r;
114 int j;
115 int sv_errno;
116 bm_pat *pat;
117
118 _DIAGASSERT(pb != NULL);
119 /* freq may be NULL */
120 #ifdef _DIAGNOSTIC
121 if (pb == NULL) {
122 errno = EFAULT;
123 return (NULL);
124 }
125 #endif
126
127 if (len == 0) {
128 errno = EINVAL;
129 return (NULL);
130 }
131 if ((pat = malloc(sizeof(*pat))) == NULL)
132 return (NULL);
133 pat->pat = NULL;
134 pat->delta = NULL;
135
136 pat->patlen = len; /* copy pattern */
137 if ((pat->pat = malloc(pat->patlen)) == NULL)
138 goto mem;
139 memcpy(pat->pat, pb, pat->patlen);
140 /* get skip delta */
141 if ((pat->delta = malloc(256 * sizeof(*d))) == NULL)
142 goto mem;
143 for (j = 0, d = pat->delta; j < 256; j++)
144 d[j] = pat->patlen;
145 for (pe = pb + pat->patlen - 1; pb <= pe; pb++)
146 d[*pb] = pe - pb;
147
148 if (freq == NULL) /* default freq table */
149 freq = freq_def;
150 r = 0; /* get guard */
151 for (pb = pat->pat, pe = pb + pat->patlen - 1; pb < pe; pb++)
152 if (freq[*pb] < freq[pat->pat[r]])
153 r = pb - pat->pat;
154 pat->rarec = pat->pat[r];
155 pat->rareoff = r - (pat->patlen - 1);
156
157 /* get md2 shift */
158 for (pe = pat->pat + pat->patlen - 1, p = pe - 1; p >= pat->pat; p--)
159 if (*p == *pe)
160 break;
161
162 /* *p is first leftward reoccurrence of *pe */
163 pat->md2 = pe - p;
164 return (pat);
165
166 mem: sv_errno = errno;
167 bm_free(pat);
168 errno = sv_errno;
169 return (NULL);
170 }
171
172 void
173 bm_free(pat)
174 bm_pat *pat;
175 {
176
177 _DIAGASSERT(pat != NULL);
178 #ifdef _DIAGNOSTIC
179 if (pat == NULL)
180 return;
181 #endif
182
183 if (pat->pat != NULL)
184 free(pat->pat);
185 if (pat->delta != NULL)
186 free(pat->delta);
187 free(pat);
188 }
189
190 u_char *
191 bm_exec(pat, base, n)
192 bm_pat *pat;
193 u_char *base;
194 size_t n;
195 {
196 u_char *e, *ep, *p, *q, *s;
197 size_t *d0, k, md2, n1, ro;
198 int rc;
199
200 _DIAGASSERT(pat != NULL);
201 _DIAGASSERT(base != NULL);
202 #ifdef _DIAGNOSTIC
203 if (pat == NULL || base == NULL)
204 return (NULL);
205 #endif
206
207 if (n == 0)
208 return (NULL);
209
210 d0 = pat->delta;
211 n1 = pat->patlen - 1;
212 md2 = pat->md2;
213 ro = pat->rareoff;
214 rc = pat->rarec;
215 ep = pat->pat + pat->patlen - 1;
216 s = base + (pat->patlen - 1);
217
218 /* fast loop up to n - 3 * patlen */
219 e = base + n - 3 * pat->patlen;
220 while (s < e) {
221 k = d0[*s]; /* ufast skip loop */
222 while (k) {
223 k = d0[*(s += k)];
224 k = d0[*(s += k)];
225 }
226 if (s >= e)
227 break;
228 if (s[ro] != rc) /* guard test */
229 goto mismatch1;
230 /* fwd match */
231 for (p = pat->pat, q = s - n1; p < ep;)
232 if (*q++ != *p++)
233 goto mismatch1;
234 return (s - n1);
235
236 mismatch1: s += md2; /* md2 shift */
237 }
238
239 /* slow loop up to end */
240 e = base + n;
241 while (s < e) {
242 s += d0[*s]; /* step */
243 if (s >= e)
244 break;
245 if (s[ro] != rc) /* guard test */
246 goto mismatch2;
247 /* fwd match */
248 for (p = pat->pat, q = s - n1; p <= ep;)
249 if (*q++ != *p++)
250 goto mismatch2;
251 return (s - n1);
252
253 mismatch2: s += md2; /* md2 shift */
254 }
255
256 return (NULL);
257 }
258