Home | History | Annotate | Line # | Download | only in string
wcscspn_bloom.h revision 1.3
      1  1.3  dholland /*	$NetBSD: wcscspn_bloom.h,v 1.3 2011/11/25 16:46:56 dholland Exp $	*/
      2  1.1     joerg 
      3  1.1     joerg /*-
      4  1.1     joerg  * Copyright (c) 2011 Joerg Sonnenberger,
      5  1.1     joerg  * All rights reserved.
      6  1.1     joerg  *
      7  1.1     joerg  * Redistribution and use in source and binary forms, with or without
      8  1.1     joerg  * modification, are permitted provided that the following conditions
      9  1.1     joerg  * are met:
     10  1.1     joerg  * 1. Redistributions of source code must retain the above copyright
     11  1.1     joerg  *    notice, this list of conditions and the following disclaimer.
     12  1.1     joerg  * 2. Redistributions in binary form must reproduce the above copyright
     13  1.1     joerg  *    notice, this list of conditions and the following disclaimer in the
     14  1.1     joerg  *    documentation and/or other materials provided with the distribution.
     15  1.1     joerg  *
     16  1.1     joerg  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
     17  1.1     joerg  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     18  1.1     joerg  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     19  1.1     joerg  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     20  1.1     joerg  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     21  1.1     joerg  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     22  1.1     joerg  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     23  1.1     joerg  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     24  1.1     joerg  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     25  1.1     joerg  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     26  1.1     joerg  * SUCH DAMAGE.
     27  1.1     joerg  */
     28  1.1     joerg 
     29  1.1     joerg /*
     30  1.1     joerg  * Bloom filter for fast test if a given character is part of the reject set.
     31  1.1     joerg  * The test may have false positives, but doesn't have false negatives.
     32  1.1     joerg  * The first hash function is designed to be very fast to evaluate.
     33  1.1     joerg  * It is collision free if the input is part of the same European language
     34  1.1     joerg  * and shouldn't be too bad even other input.  The second hash function
     35  1.1     joerg  * tries to provide a much better mixing, but involves the slower
     36  1.1     joerg  * multiplication.
     37  1.1     joerg  */
     38  1.1     joerg 
     39  1.3  dholland #include <limits.h>
     40  1.3  dholland 
     41  1.1     joerg #define	BLOOM_SIZE		64
     42  1.1     joerg #define	BLOOM_ARRAY_SIZE	(BLOOM_SIZE / sizeof(size_t))
     43  1.3  dholland #define	BLOOM_BITS		(BLOOM_SIZE * CHAR_BIT)
     44  1.3  dholland #define	BLOOM_DIV		(sizeof(size_t) * CHAR_BIT)
     45  1.1     joerg 
     46  1.1     joerg static inline size_t
     47  1.1     joerg wcscspn_bloom1(size_t x)
     48  1.1     joerg {
     49  1.3  dholland 	return x % BLOOM_BITS;
     50  1.1     joerg }
     51  1.1     joerg 
     52  1.1     joerg static inline size_t
     53  1.1     joerg wcscspn_bloom2(size_t x)
     54  1.1     joerg {
     55  1.2      tron 	return (size_t)((uint32_t)(x * 2654435761U) /
     56  1.3  dholland 	    (0x100000000ULL / BLOOM_BITS));
     57  1.1     joerg }
     58  1.1     joerg 
     59  1.1     joerg static inline void
     60  1.1     joerg wcsspn_bloom_init(size_t *bloom, const wchar_t *charset)
     61  1.1     joerg {
     62  1.1     joerg 	size_t val;
     63  1.1     joerg 
     64  1.1     joerg 	memset(bloom, 0, BLOOM_SIZE);
     65  1.1     joerg 	do {
     66  1.1     joerg 		val = wcscspn_bloom1((size_t)*charset);
     67  1.2      tron 		bloom[val / BLOOM_DIV] |= (size_t)(1ULL << (val % BLOOM_DIV));
     68  1.1     joerg 		val = wcscspn_bloom2((size_t)*charset);
     69  1.2      tron 		bloom[val / BLOOM_DIV] |= (size_t)(1ULL << (val % BLOOM_DIV));
     70  1.1     joerg 	}
     71  1.1     joerg 	while (*++charset);
     72  1.1     joerg }
     73  1.1     joerg 
     74  1.1     joerg static inline int
     75  1.1     joerg wcsspn_in_bloom(const size_t *bloom, wchar_t ch)
     76  1.1     joerg {
     77  1.1     joerg 	size_t val;
     78  1.1     joerg 
     79  1.1     joerg 	val = wcscspn_bloom1((size_t)ch);
     80  1.1     joerg 	if (bloom[val / BLOOM_DIV] & (1ULL << (val % BLOOM_DIV)))
     81  1.1     joerg 		return 1;
     82  1.1     joerg 	val = wcscspn_bloom2((size_t)ch);
     83  1.1     joerg 	if (bloom[val / BLOOM_DIV] & (1ULL << (val % BLOOM_DIV)))
     84  1.1     joerg 		return 1;
     85  1.1     joerg 	return 0;
     86  1.1     joerg }
     87