Home | History | Annotate | Line # | Download | only in string
      1 /*	$NetBSD: strspn.c,v 1.3 2023/06/18 22:18:13 rillig Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2008 Joerg Sonnenberger
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #include <sys/cdefs.h>
     29 __RCSID("$NetBSD: strspn.c,v 1.3 2023/06/18 22:18:13 rillig Exp $");
     30 
     31 #if !defined(_KERNEL) && !defined(_STANDALONE)
     32 #include <assert.h>
     33 #include <inttypes.h>
     34 #include <limits.h>
     35 #include <string.h>
     36 #else
     37 #include <lib/libkern/libkern.h>
     38 #endif
     39 
     40 #if ULONG_MAX != 0xffffffffffffffffull
     41 
     42 size_t
     43 strspn(const char *s, const char *charset)
     44 {
     45 	static const uint8_t idx[8] = { 1, 2, 4, 8, 16, 32, 64, 128 };
     46 	uint8_t set[32];
     47 	const char *t;
     48 #define UC(a) ((unsigned int)(unsigned char)(a))
     49 
     50 	if (charset[0] == '\0')
     51 		return 0;
     52 	if (charset[1] == '\0') {
     53 		for (t = s; *t != '\0'; ++t) {
     54 			if (*t != *charset)
     55 				break;
     56 		}
     57 		return t - s;
     58 	}
     59 
     60 	(void)memset(set, 0, sizeof(set));
     61 
     62 	for (; *charset != '\0'; ++charset)
     63 		set[UC(*charset) >> 3] |= idx[UC(*charset) & 7];
     64 
     65 	for (t = s; *t != '\0'; ++t)
     66 		if ((set[UC(*t) >> 3] & idx[UC(*t) & 7]) == 0)
     67 			break;
     68 	return t - s;
     69 }
     70 
     71 #else
     72 
     73 /* 64 bit system, use four 64 bits registers for bitmask */
     74 
     75 static size_t
     76 strspn_x(const char *s_s, const char *charset_s, unsigned long invert)
     77 {
     78 	const unsigned char *s = (const unsigned char *)s_s;
     79 	const unsigned char *charset = (const unsigned char *)charset_s;
     80 	unsigned long m_0, m_4, m_8, m_c;
     81 	unsigned char ch, next_ch;
     82 	unsigned long bit;
     83 	unsigned long check;
     84 	size_t count;
     85 
     86 	/* Four 64bit registers have one bit for each character value */
     87 	m_0 = 0;
     88 	m_4 = 0;
     89 	m_8 = 0;
     90 	m_c = 0;
     91 
     92 	for (ch = *charset; ch != 0; ch = next_ch) {
     93 		next_ch = *++charset;
     94 		bit = 1ul << (ch & 0x3f);
     95 		if (__predict_true(ch < 0x80)) {
     96 			if (ch < 0x40)
     97 				m_0 |= bit;
     98 			else
     99 				m_4 |= bit;
    100 		} else {
    101 			if (ch < 0xc0)
    102 				m_8 |= bit;
    103 			else
    104 				m_c |= bit;
    105 		}
    106 	}
    107 
    108 	/* For strcspn() we just invert the validity set */
    109 	m_0 ^= invert;
    110 	m_4 ^= invert;
    111 	m_8 ^= invert;
    112 	m_c ^= invert;
    113 
    114 	/*
    115 	 * We could do remove the lsb from m_0 to terminate at the
    116 	 * end of the input string.
    117 	 * However prefetching the next char is beneficial and we must
    118 	 * not read the byte after the \0 - as it might fault!
    119 	 * So we take the 'hit' of the compare against 0.
    120 	 */
    121 
    122 	ch = *s++;
    123 	for (count = 0; ch != 0; ch = next_ch) {
    124 		next_ch = s[count];
    125 		if (__predict_true(ch < 0x80)) {
    126 			check = m_0;
    127 			if (ch >= 0x40)
    128 				check = m_4;
    129 		} else {
    130 			check = m_8;
    131 			if (ch >= 0xc0)
    132 				check = m_c;
    133 		}
    134 		if (!((check >> (ch & 0x3f)) & 1))
    135 			break;
    136 		count++;
    137 	}
    138 	return count;
    139 }
    140 
    141 size_t
    142 strspn(const char *s, const char *charset)
    143 {
    144 	return strspn_x(s, charset, 0);
    145 }
    146 
    147 size_t
    148 strcspn(const char *s, const char *charset)
    149 {
    150 	return strspn_x(s, charset, ~0ul);
    151 }
    152 #endif
    153