bm.c revision 1.6 1 1.6 jtc /* $NetBSD: bm.c,v 1.6 1997/07/21 14:09:07 jtc Exp $ */
2 1.5 christos
3 1.1 cgd /*-
4 1.1 cgd * Copyright (c) 1994
5 1.1 cgd * The Regents of the University of California. All rights reserved.
6 1.1 cgd *
7 1.1 cgd * This code is derived from software contributed to Berkeley by
8 1.1 cgd * Andrew Hume of AT&T Bell Laboratories.
9 1.1 cgd *
10 1.1 cgd * Redistribution and use in source and binary forms, with or without
11 1.1 cgd * modification, are permitted provided that the following conditions
12 1.1 cgd * are met:
13 1.1 cgd * 1. Redistributions of source code must retain the above copyright
14 1.1 cgd * notice, this list of conditions and the following disclaimer.
15 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright
16 1.1 cgd * notice, this list of conditions and the following disclaimer in the
17 1.1 cgd * documentation and/or other materials provided with the distribution.
18 1.1 cgd * 3. All advertising materials mentioning features or use of this software
19 1.1 cgd * must display the following acknowledgement:
20 1.1 cgd * This product includes software developed by the University of
21 1.1 cgd * California, Berkeley and its contributors.
22 1.1 cgd * 4. Neither the name of the University nor the names of its contributors
23 1.1 cgd * may be used to endorse or promote products derived from this software
24 1.1 cgd * without specific prior written permission.
25 1.1 cgd *
26 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 1.1 cgd * SUCH DAMAGE.
37 1.1 cgd */
38 1.1 cgd
39 1.5 christos #include <sys/cdefs.h>
40 1.4 jtc #if defined(LIBC_SCCS) && !defined(lint)
41 1.5 christos #if 0
42 1.5 christos static char sccsid[] = "@(#)bm.c 8.7 (Berkeley) 6/21/94";
43 1.5 christos #else
44 1.6 jtc __RCSID("$NetBSD: bm.c,v 1.6 1997/07/21 14:09:07 jtc Exp $");
45 1.5 christos #endif
46 1.4 jtc #endif /* LIBC_SCCS && not lint */
47 1.1 cgd
48 1.6 jtc #include "namespace.h"
49 1.1 cgd #include <sys/types.h>
50 1.1 cgd
51 1.1 cgd #include <bm.h>
52 1.1 cgd #include <errno.h>
53 1.1 cgd #include <stdlib.h>
54 1.1 cgd #include <string.h>
55 1.6 jtc
56 1.6 jtc #ifdef __weak_alias
57 1.6 jtc __weak_alias(bm_comp,_bm_comp);
58 1.6 jtc __weak_alias(bm_exec,_bm_exec);
59 1.6 jtc __weak_alias(bm_free,_bm_free);
60 1.6 jtc #endif
61 1.1 cgd
62 1.1 cgd /*
63 1.1 cgd * XXX
64 1.1 cgd * The default frequency table starts at 99 and counts down. The default
65 1.1 cgd * table should probably be oriented toward text, and will necessarily be
66 1.1 cgd * locale specific. This one is for English. It was derived from the
67 1.1 cgd * OSF/1 and 4.4BSD formatted and unformatted manual pages, and about 100Mb
68 1.1 cgd * of email and random text. Change it if you can find something better.
69 1.1 cgd */
70 1.1 cgd static u_char const freq_def[256] = {
71 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0,
72 1.1 cgd 0, 77, 90, 0, 0, 0, 0, 0,
73 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0,
74 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0,
75 1.1 cgd 99, 28, 42, 27, 16, 14, 20, 51,
76 1.1 cgd 66, 65, 59, 24, 75, 76, 84, 56,
77 1.1 cgd 72, 74, 64, 55, 54, 47, 41, 37,
78 1.1 cgd 44, 61, 70, 43, 23, 53, 49, 22,
79 1.1 cgd 33, 58, 40, 46, 45, 57, 60, 26,
80 1.1 cgd 30, 63, 21, 12, 32, 50, 38, 39,
81 1.1 cgd 34, 11, 48, 67, 62, 35, 15, 29,
82 1.1 cgd 71, 18, 9, 17, 25, 13, 10, 52,
83 1.1 cgd 36, 95, 78, 86, 87, 98, 82, 80,
84 1.1 cgd 88, 94, 19, 68, 89, 83, 93, 96,
85 1.1 cgd 81, 7, 91, 92, 97, 85, 69, 73,
86 1.1 cgd 31, 79, 8, 5, 4, 6, 3, 0,
87 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0,
88 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0,
89 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0,
90 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0,
91 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0,
92 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0,
93 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0,
94 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0,
95 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0,
96 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0,
97 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0,
98 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0,
99 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0,
100 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0,
101 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0,
102 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0,
103 1.1 cgd };
104 1.1 cgd
105 1.1 cgd bm_pat *
106 1.1 cgd bm_comp(pb, len, freq)
107 1.1 cgd u_char const *pb;
108 1.1 cgd size_t len;
109 1.1 cgd u_char const *freq;
110 1.1 cgd {
111 1.1 cgd register u_char const *pe, *p;
112 1.1 cgd register size_t *d, r;
113 1.1 cgd register int j;
114 1.1 cgd int sv_errno;
115 1.1 cgd bm_pat *pat;
116 1.1 cgd
117 1.1 cgd if (len == 0) {
118 1.1 cgd errno = EINVAL;
119 1.1 cgd return (NULL);
120 1.1 cgd }
121 1.1 cgd if ((pat = malloc(sizeof(*pat))) == NULL)
122 1.1 cgd return (NULL);
123 1.1 cgd pat->pat = NULL;
124 1.1 cgd pat->delta = NULL;
125 1.1 cgd
126 1.1 cgd pat->patlen = len; /* copy pattern */
127 1.1 cgd if ((pat->pat = malloc(pat->patlen)) == NULL)
128 1.1 cgd goto mem;
129 1.1 cgd memcpy(pat->pat, pb, pat->patlen);
130 1.1 cgd /* get skip delta */
131 1.1 cgd if ((pat->delta = malloc(256 * sizeof(*d))) == NULL)
132 1.1 cgd goto mem;
133 1.1 cgd for (j = 0, d = pat->delta; j < 256; j++)
134 1.1 cgd d[j] = pat->patlen;
135 1.1 cgd for (pe = pb + pat->patlen - 1; pb <= pe; pb++)
136 1.1 cgd d[*pb] = pe - pb;
137 1.1 cgd
138 1.1 cgd if (freq == NULL) /* default freq table */
139 1.1 cgd freq = freq_def;
140 1.1 cgd r = 0; /* get guard */
141 1.1 cgd for (pb = pat->pat, pe = pb + pat->patlen - 1; pb < pe; pb++)
142 1.1 cgd if (freq[*pb] < freq[pat->pat[r]])
143 1.1 cgd r = pb - pat->pat;
144 1.1 cgd pat->rarec = pat->pat[r];
145 1.1 cgd pat->rareoff = r - (pat->patlen - 1);
146 1.1 cgd
147 1.1 cgd /* get md2 shift */
148 1.1 cgd for (pe = pat->pat + pat->patlen - 1, p = pe - 1; p >= pat->pat; p--)
149 1.1 cgd if (*p == *pe)
150 1.1 cgd break;
151 1.1 cgd
152 1.1 cgd /* *p is first leftward reoccurrence of *pe */
153 1.1 cgd pat->md2 = pe - p;
154 1.1 cgd return (pat);
155 1.1 cgd
156 1.1 cgd mem: sv_errno = errno;
157 1.1 cgd bm_free(pat);
158 1.1 cgd errno = sv_errno;
159 1.1 cgd return (NULL);
160 1.1 cgd }
161 1.1 cgd
162 1.1 cgd void
163 1.1 cgd bm_free(pat)
164 1.1 cgd bm_pat *pat;
165 1.1 cgd {
166 1.1 cgd if (pat->pat != NULL)
167 1.1 cgd free(pat->pat);
168 1.1 cgd if (pat->delta != NULL)
169 1.1 cgd free(pat->delta);
170 1.1 cgd free(pat);
171 1.1 cgd }
172 1.1 cgd
173 1.1 cgd u_char *
174 1.1 cgd bm_exec(pat, base, n)
175 1.1 cgd bm_pat *pat;
176 1.1 cgd u_char *base;
177 1.1 cgd size_t n;
178 1.1 cgd {
179 1.1 cgd register u_char *e, *ep, *p, *q, *s;
180 1.1 cgd register size_t *d0, k, md2, n1, ro;
181 1.1 cgd register int rc;
182 1.1 cgd
183 1.1 cgd if (n == 0)
184 1.1 cgd return (NULL);
185 1.1 cgd
186 1.1 cgd d0 = pat->delta;
187 1.1 cgd n1 = pat->patlen - 1;
188 1.1 cgd md2 = pat->md2;
189 1.1 cgd ro = pat->rareoff;
190 1.1 cgd rc = pat->rarec;
191 1.1 cgd ep = pat->pat + pat->patlen - 1;
192 1.1 cgd s = base + (pat->patlen - 1);
193 1.1 cgd
194 1.1 cgd /* fast loop up to n - 3 * patlen */
195 1.1 cgd e = base + n - 3 * pat->patlen;
196 1.1 cgd while (s < e) {
197 1.1 cgd k = d0[*s]; /* ufast skip loop */
198 1.1 cgd while (k) {
199 1.1 cgd k = d0[*(s += k)];
200 1.1 cgd k = d0[*(s += k)];
201 1.1 cgd }
202 1.1 cgd if (s >= e)
203 1.1 cgd break;
204 1.1 cgd if (s[ro] != rc) /* guard test */
205 1.1 cgd goto mismatch1;
206 1.1 cgd /* fwd match */
207 1.1 cgd for (p = pat->pat, q = s - n1; p < ep;)
208 1.1 cgd if (*q++ != *p++)
209 1.1 cgd goto mismatch1;
210 1.1 cgd return (s - n1);
211 1.1 cgd
212 1.1 cgd mismatch1: s += md2; /* md2 shift */
213 1.1 cgd }
214 1.1 cgd
215 1.1 cgd /* slow loop up to end */
216 1.1 cgd e = base + n;
217 1.1 cgd while (s < e) {
218 1.1 cgd s += d0[*s]; /* step */
219 1.1 cgd if (s >= e)
220 1.1 cgd break;
221 1.1 cgd if (s[ro] != rc) /* guard test */
222 1.1 cgd goto mismatch2;
223 1.1 cgd /* fwd match */
224 1.3 cgd for (p = pat->pat, q = s - n1; p <= ep;)
225 1.1 cgd if (*q++ != *p++)
226 1.1 cgd goto mismatch2;
227 1.1 cgd return (s - n1);
228 1.1 cgd
229 1.1 cgd mismatch2: s += md2; /* md2 shift */
230 1.1 cgd }
231 1.1 cgd
232 1.1 cgd return (NULL);
233 1.1 cgd }
234