Home | History | Annotate | Line # | Download | only in string
strspn.c revision 1.1.2.2
      1 /*	$NetBSD: strspn.c,v 1.1.2.2 2014/08/10 06:47:06 tls Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2008 Joerg Sonnenberger
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #include <sys/cdefs.h>
     29 __RCSID("$NetBSD: strspn.c,v 1.1.2.2 2014/08/10 06:47:06 tls Exp $");
     30 
     31 #if !defined(_KERNEL) && !defined(_STANDALONE)
     32 #include <assert.h>
     33 #include <inttypes.h>
     34 #include <limits.h>
     35 #include <string.h>
     36 #else
     37 #include <lib/libkern/libkern.h>
     38 #endif
     39 
     40 #if ULONG_MAX != 0xffffffffffffffffull
     41 
     42 size_t
     43 strspn(const char *s, const char *charset)
     44 {
     45 	static const uint8_t idx[8] = { 1, 2, 4, 8, 16, 32, 64, 128 };
     46 	uint8_t set[32];
     47 	const char *t;
     48 #define UC(a) ((unsigned int)(unsigned char)(a))
     49 
     50 	_DIAGASSERT(s != NULL);
     51 	_DIAGASSERT(charset != NULL);
     52 
     53 	if (charset[0] == '\0')
     54 		return 0;
     55 	if (charset[1] == '\0') {
     56 		for (t = s; *t != '\0'; ++t) {
     57 			if (*t != *charset)
     58 				break;
     59 		}
     60 		return t - s;
     61 	}
     62 
     63 	(void)memset(set, 0, sizeof(set));
     64 
     65 	for (; *charset != '\0'; ++charset)
     66 		set[UC(*charset) >> 3] |= idx[UC(*charset) & 7];
     67 
     68 	for (t = s; *t != '\0'; ++t)
     69 		if ((set[UC(*t) >> 3] & idx[UC(*t) & 7]) == 0)
     70 			break;
     71 	return t - s;
     72 }
     73 
     74 #else
     75 
     76 /* 64 bit system, use four 64 bits registers for bitmask */
     77 
     78 static size_t
     79 strspn_x(const char *s_s, const char *charset_s, unsigned long invert)
     80 {
     81 	const unsigned char *s = (const unsigned char *)s_s;
     82 	const unsigned char *charset = (const unsigned char *)charset_s;
     83 	unsigned long m_0, m_4, m_8, m_c;
     84 	unsigned char ch, next_ch;
     85 	unsigned long bit;
     86 	unsigned long check;
     87 	size_t count;
     88 
     89 	/* Four 64bit registers have one bit for each character value */
     90 	m_0 = 0;
     91 	m_4 = 0;
     92 	m_8 = 0;
     93 	m_c = 0;
     94 
     95 	for (ch = *charset; ch != 0; ch = next_ch) {
     96 		next_ch = *++charset;
     97 		bit = 1ul << (ch & 0x3f);
     98 		if (__predict_true(ch < 0x80)) {
     99 			if (ch < 0x40)
    100 				m_0 |= bit;
    101 			else
    102 				m_4 |= bit;
    103 		} else {
    104 			if (ch < 0xc0)
    105 				m_8 |= bit;
    106 			else
    107 				m_c |= bit;
    108 		}
    109 	}
    110 
    111 	/* For strcspn() we just invert the validity set */
    112 	m_0 ^= invert;
    113 	m_4 ^= invert;
    114 	m_8 ^= invert;
    115 	m_c ^= invert;
    116 
    117 	/*
    118 	 * We could do remove the lsb from m_0 to terminate at the
    119 	 * end of the input string.
    120 	 * However prefetching the next char is benifitial and we must
    121 	 * not read the byte after the \0 - as it might fault!
    122 	 * So we take the 'hit' of the compare against 0.
    123 	 */
    124 
    125 	ch = *s++;
    126 	for (count = 0; ch != 0; ch = next_ch) {
    127 		next_ch = s[count];
    128 		if (__predict_true(ch < 0x80)) {
    129 			check = m_0;
    130 			if (ch >= 0x40)
    131 				check = m_4;
    132 		} else {
    133 			check = m_8;
    134 			if (ch >= 0xc0)
    135 				check = m_c;
    136 		}
    137 		if (!((check >> (ch & 0x3f)) & 1))
    138 			break;
    139 		count++;
    140 	}
    141 	return count;
    142 }
    143 
    144 size_t
    145 strspn(const char *s, const char *charset)
    146 {
    147 	return strspn_x(s, charset, 0);
    148 }
    149 
    150 size_t
    151 strcspn(const char *s, const char *charset)
    152 {
    153 	return strspn_x(s, charset, ~0ul);
    154 }
    155 #endif
    156