1 1.13 shm /* $NetBSD: bm.c,v 1.13 2014/06/23 10:43:25 shm Exp $ */ 2 1.5 christos 3 1.1 cgd /*- 4 1.1 cgd * Copyright (c) 1994 5 1.1 cgd * The Regents of the University of California. All rights reserved. 6 1.1 cgd * 7 1.1 cgd * This code is derived from software contributed to Berkeley by 8 1.1 cgd * Andrew Hume of AT&T Bell Laboratories. 9 1.1 cgd * 10 1.1 cgd * Redistribution and use in source and binary forms, with or without 11 1.1 cgd * modification, are permitted provided that the following conditions 12 1.1 cgd * are met: 13 1.1 cgd * 1. Redistributions of source code must retain the above copyright 14 1.1 cgd * notice, this list of conditions and the following disclaimer. 15 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright 16 1.1 cgd * notice, this list of conditions and the following disclaimer in the 17 1.1 cgd * documentation and/or other materials provided with the distribution. 18 1.11 agc * 3. Neither the name of the University nor the names of its contributors 19 1.1 cgd * may be used to endorse or promote products derived from this software 20 1.1 cgd * without specific prior written permission. 21 1.1 cgd * 22 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 1.1 cgd * SUCH DAMAGE. 33 1.1 cgd */ 34 1.1 cgd 35 1.5 christos #include <sys/cdefs.h> 36 1.4 jtc #if defined(LIBC_SCCS) && !defined(lint) 37 1.5 christos #if 0 38 1.5 christos static char sccsid[] = "@(#)bm.c 8.7 (Berkeley) 6/21/94"; 39 1.5 christos #else 40 1.13 shm __RCSID("$NetBSD: bm.c,v 1.13 2014/06/23 10:43:25 shm Exp $"); 41 1.5 christos #endif 42 1.4 jtc #endif /* LIBC_SCCS && not lint */ 43 1.1 cgd 44 1.6 jtc #include "namespace.h" 45 1.1 cgd #include <sys/types.h> 46 1.1 cgd 47 1.8 lukem #include <assert.h> 48 1.1 cgd #include <bm.h> 49 1.1 cgd #include <errno.h> 50 1.1 cgd #include <stdlib.h> 51 1.1 cgd #include <string.h> 52 1.6 jtc 53 1.6 jtc #ifdef __weak_alias 54 1.10 mycroft __weak_alias(bm_comp,_bm_comp) 55 1.10 mycroft __weak_alias(bm_exec,_bm_exec) 56 1.10 mycroft __weak_alias(bm_free,_bm_free) 57 1.6 jtc #endif 58 1.1 cgd 59 1.1 cgd /* 60 1.1 cgd * XXX 61 1.1 cgd * The default frequency table starts at 99 and counts down. The default 62 1.1 cgd * table should probably be oriented toward text, and will necessarily be 63 1.1 cgd * locale specific. This one is for English. It was derived from the 64 1.1 cgd * OSF/1 and 4.4BSD formatted and unformatted manual pages, and about 100Mb 65 1.1 cgd * of email and random text. Change it if you can find something better. 66 1.1 cgd */ 67 1.1 cgd static u_char const freq_def[256] = { 68 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0, 69 1.1 cgd 0, 77, 90, 0, 0, 0, 0, 0, 70 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0, 71 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0, 72 1.1 cgd 99, 28, 42, 27, 16, 14, 20, 51, 73 1.1 cgd 66, 65, 59, 24, 75, 76, 84, 56, 74 1.1 cgd 72, 74, 64, 55, 54, 47, 41, 37, 75 1.1 cgd 44, 61, 70, 43, 23, 53, 49, 22, 76 1.1 cgd 33, 58, 40, 46, 45, 57, 60, 26, 77 1.1 cgd 30, 63, 21, 12, 32, 50, 38, 39, 78 1.1 cgd 34, 11, 48, 67, 62, 35, 15, 29, 79 1.1 cgd 71, 18, 9, 17, 25, 13, 10, 52, 80 1.1 cgd 36, 95, 78, 86, 87, 98, 82, 80, 81 1.1 cgd 88, 94, 19, 68, 89, 83, 93, 96, 82 1.1 cgd 81, 7, 91, 92, 97, 85, 69, 73, 83 1.1 cgd 31, 79, 8, 5, 4, 6, 3, 0, 84 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0, 85 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0, 86 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0, 87 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0, 88 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0, 89 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0, 90 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0, 91 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0, 92 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0, 93 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0, 94 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0, 95 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0, 96 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0, 97 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0, 98 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0, 99 1.1 cgd 0, 0, 0, 0, 0, 0, 0, 0, 100 1.1 cgd }; 101 1.1 cgd 102 1.1 cgd bm_pat * 103 1.12 abs bm_comp(u_char const *pb, size_t len, u_char const *freq) 104 1.1 cgd { 105 1.7 perry u_char const *pe, *p; 106 1.7 perry size_t *d, r; 107 1.7 perry int j; 108 1.1 cgd int sv_errno; 109 1.1 cgd bm_pat *pat; 110 1.1 cgd 111 1.8 lukem _DIAGASSERT(pb != NULL); 112 1.8 lukem /* freq may be NULL */ 113 1.8 lukem 114 1.1 cgd if (len == 0) { 115 1.1 cgd errno = EINVAL; 116 1.1 cgd return (NULL); 117 1.1 cgd } 118 1.1 cgd if ((pat = malloc(sizeof(*pat))) == NULL) 119 1.1 cgd return (NULL); 120 1.1 cgd pat->pat = NULL; 121 1.1 cgd pat->delta = NULL; 122 1.1 cgd 123 1.1 cgd pat->patlen = len; /* copy pattern */ 124 1.1 cgd if ((pat->pat = malloc(pat->patlen)) == NULL) 125 1.1 cgd goto mem; 126 1.1 cgd memcpy(pat->pat, pb, pat->patlen); 127 1.1 cgd /* get skip delta */ 128 1.1 cgd if ((pat->delta = malloc(256 * sizeof(*d))) == NULL) 129 1.1 cgd goto mem; 130 1.1 cgd for (j = 0, d = pat->delta; j < 256; j++) 131 1.1 cgd d[j] = pat->patlen; 132 1.1 cgd for (pe = pb + pat->patlen - 1; pb <= pe; pb++) 133 1.1 cgd d[*pb] = pe - pb; 134 1.1 cgd 135 1.1 cgd if (freq == NULL) /* default freq table */ 136 1.1 cgd freq = freq_def; 137 1.1 cgd r = 0; /* get guard */ 138 1.1 cgd for (pb = pat->pat, pe = pb + pat->patlen - 1; pb < pe; pb++) 139 1.1 cgd if (freq[*pb] < freq[pat->pat[r]]) 140 1.1 cgd r = pb - pat->pat; 141 1.1 cgd pat->rarec = pat->pat[r]; 142 1.1 cgd pat->rareoff = r - (pat->patlen - 1); 143 1.1 cgd 144 1.1 cgd /* get md2 shift */ 145 1.1 cgd for (pe = pat->pat + pat->patlen - 1, p = pe - 1; p >= pat->pat; p--) 146 1.1 cgd if (*p == *pe) 147 1.1 cgd break; 148 1.1 cgd 149 1.1 cgd /* *p is first leftward reoccurrence of *pe */ 150 1.1 cgd pat->md2 = pe - p; 151 1.1 cgd return (pat); 152 1.1 cgd 153 1.1 cgd mem: sv_errno = errno; 154 1.1 cgd bm_free(pat); 155 1.1 cgd errno = sv_errno; 156 1.1 cgd return (NULL); 157 1.1 cgd } 158 1.1 cgd 159 1.1 cgd void 160 1.12 abs bm_free(bm_pat *pat) 161 1.1 cgd { 162 1.8 lukem 163 1.8 lukem _DIAGASSERT(pat != NULL); 164 1.8 lukem 165 1.13 shm free(pat->pat); 166 1.13 shm free(pat->delta); 167 1.1 cgd free(pat); 168 1.1 cgd } 169 1.1 cgd 170 1.1 cgd u_char * 171 1.12 abs bm_exec(bm_pat *pat, u_char *base, size_t n) 172 1.1 cgd { 173 1.7 perry u_char *e, *ep, *p, *q, *s; 174 1.7 perry size_t *d0, k, md2, n1, ro; 175 1.7 perry int rc; 176 1.8 lukem 177 1.8 lukem _DIAGASSERT(pat != NULL); 178 1.8 lukem _DIAGASSERT(base != NULL); 179 1.1 cgd 180 1.1 cgd if (n == 0) 181 1.1 cgd return (NULL); 182 1.1 cgd 183 1.1 cgd d0 = pat->delta; 184 1.1 cgd n1 = pat->patlen - 1; 185 1.1 cgd md2 = pat->md2; 186 1.1 cgd ro = pat->rareoff; 187 1.1 cgd rc = pat->rarec; 188 1.1 cgd ep = pat->pat + pat->patlen - 1; 189 1.1 cgd s = base + (pat->patlen - 1); 190 1.1 cgd 191 1.1 cgd /* fast loop up to n - 3 * patlen */ 192 1.1 cgd e = base + n - 3 * pat->patlen; 193 1.1 cgd while (s < e) { 194 1.1 cgd k = d0[*s]; /* ufast skip loop */ 195 1.13 shm while (k && s < e) { 196 1.1 cgd k = d0[*(s += k)]; 197 1.1 cgd k = d0[*(s += k)]; 198 1.1 cgd } 199 1.1 cgd if (s >= e) 200 1.1 cgd break; 201 1.1 cgd if (s[ro] != rc) /* guard test */ 202 1.1 cgd goto mismatch1; 203 1.1 cgd /* fwd match */ 204 1.1 cgd for (p = pat->pat, q = s - n1; p < ep;) 205 1.1 cgd if (*q++ != *p++) 206 1.1 cgd goto mismatch1; 207 1.1 cgd return (s - n1); 208 1.1 cgd 209 1.1 cgd mismatch1: s += md2; /* md2 shift */ 210 1.1 cgd } 211 1.1 cgd 212 1.1 cgd /* slow loop up to end */ 213 1.1 cgd e = base + n; 214 1.1 cgd while (s < e) { 215 1.1 cgd s += d0[*s]; /* step */ 216 1.1 cgd if (s >= e) 217 1.1 cgd break; 218 1.1 cgd if (s[ro] != rc) /* guard test */ 219 1.1 cgd goto mismatch2; 220 1.1 cgd /* fwd match */ 221 1.3 cgd for (p = pat->pat, q = s - n1; p <= ep;) 222 1.1 cgd if (*q++ != *p++) 223 1.1 cgd goto mismatch2; 224 1.1 cgd return (s - n1); 225 1.1 cgd 226 1.1 cgd mismatch2: s += md2; /* md2 shift */ 227 1.1 cgd } 228 1.1 cgd 229 1.1 cgd return (NULL); 230 1.1 cgd } 231