Home | History | Annotate | Line # | Download | only in libedit
chartype.c revision 1.3
      1 /*	$NetBSD: chartype.c,v 1.3 2010/01/12 19:37:18 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2009 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. All advertising materials mentioning features or use of this software
     16  *    must display the following acknowledgement:
     17  *        This product includes software developed by the NetBSD
     18  *        Foundation, Inc. and its contributors.
     19  * 4. Neither the name of The NetBSD Foundation nor the names of its
     20  *    contributors may be used to endorse or promote products derived
     21  *    from this software without specific prior written permission.
     22  *
     23  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     25  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     27  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     30  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     31  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     32  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     33  * POSSIBILITY OF SUCH DAMAGE.
     34  */
     35 
     36 /*
     37  * chartype.c: character classification and meta information
     38  */
     39 #include "config.h"
     40 #if !defined(lint) && !defined(SCCSID)
     41 __RCSID("$NetBSD: chartype.c,v 1.3 2010/01/12 19:37:18 christos Exp $");
     42 #endif /* not lint && not SCCSID */
     43 #include "el.h"
     44 #include <stdlib.h>
     45 
     46 #define CT_BUFSIZ 1024
     47 
     48 
     49 #ifdef WIDECHAR
     50 protected void
     51 ct_conv_buff_resize(ct_buffer_t *conv, size_t mincsize, size_t minwsize)
     52 {
     53 	void *p;
     54 	if (mincsize > conv->csize) {
     55 		conv->csize = mincsize;
     56 		p = el_realloc(conv->cbuff, conv->csize);
     57 		if (p == NULL) {
     58 			conv->csize = 0;
     59 			el_free(conv->cbuff);
     60 			conv->cbuff = NULL;
     61 		} else
     62 			conv->cbuff = p;
     63 	}
     64 
     65 	if (minwsize > conv->wsize) {
     66 		conv->wsize = minwsize;
     67 		p = el_realloc(conv->wbuff, conv->wsize);
     68 		if (p == NULL) {
     69 			conv->wsize = 0;
     70 			el_free(conv->wbuff);
     71 			conv->wbuff = NULL;
     72 		} else
     73 			conv->wbuff = p;
     74 	}
     75 }
     76 
     77 
     78 public char *
     79 ct_encode_string(const Char *s, ct_buffer_t *conv)
     80 {
     81 	char *dst;
     82 	ssize_t used = 0;
     83 
     84 	if (!s)
     85 		return NULL;
     86 	if (!conv->cbuff)
     87 		ct_conv_buff_resize(conv, CT_BUFSIZ, 0);
     88 	if (!conv->cbuff)
     89 		return NULL;
     90 
     91 	dst = conv->cbuff;
     92 	while (*s) {
     93 		used = ct_encode_char(dst, (int)(conv->csize -
     94 		    (dst - conv->cbuff)), *s);
     95 		if (used == -1) { /* failed to encode, need more buffer space */
     96 			used = dst - conv->cbuff;
     97 			ct_conv_buff_resize(conv, conv->csize + CT_BUFSIZ, 0);
     98 			if (!conv->cbuff)
     99 				return NULL;
    100 			dst = conv->cbuff + used;
    101 			/* don't increment s here - we want to retry it! */
    102 		}
    103 		else
    104 			++s;
    105 		dst += used;
    106 	}
    107 	if (dst >= (conv->cbuff + conv->csize)) {
    108 		used = dst - conv->cbuff;
    109 		ct_conv_buff_resize(conv, conv->csize + 1, 0);
    110 		if (!conv->cbuff)
    111 			return NULL;
    112 		dst = conv->cbuff + used;
    113 	}
    114 	*dst = '\0';
    115 	return conv->cbuff;
    116 }
    117 
    118 public Char *
    119 ct_decode_string(const char *s, ct_buffer_t *conv)
    120 {
    121 	size_t len = 0;
    122 
    123 	if (!s)
    124 		return NULL;
    125 	if (!conv->wbuff)
    126 		ct_conv_buff_resize(conv, 0, CT_BUFSIZ);
    127 	if (!conv->wbuff)
    128 		return NULL;
    129 
    130 	len = ct_mbstowcs(0, s, 0);
    131 	if (len > conv->wsize)
    132 		ct_conv_buff_resize(conv, 0, len + 1);
    133 	if (!conv->wbuff)
    134 		return NULL;
    135 	ct_mbstowcs(conv->wbuff, s, conv->wsize);
    136 	return conv->wbuff;
    137 }
    138 
    139 
    140 protected Char **
    141 ct_decode_argv(int argc, const char *argv[], ct_buffer_t *conv)
    142 {
    143 	size_t bufspace;
    144 	int i;
    145 	Char *p;
    146 	Char **wargv;
    147 	ssize_t bytes;
    148 
    149 	/* Make sure we have enough space in the conversion buffer to store all
    150 	 * the argv strings. */
    151 	for (i = 0, bufspace = 0; i < argc; ++i)
    152 		bufspace += argv[i] ? strlen(argv[i]) + 1 : 0;
    153 	ct_conv_buff_resize(conv, 0, bufspace);
    154 	if (!conv->wsize)
    155 		return NULL;
    156 
    157 	wargv = el_malloc(argc * sizeof(*wargv));
    158 
    159 	for (i = 0, p = conv->wbuff; i < argc; ++i) {
    160 		if (!argv[i]) {   /* don't pass null pointers to mbstowcs */
    161 			wargv[i] = NULL;
    162 			continue;
    163 		} else {
    164 			wargv[i] = p;
    165 			bytes = mbstowcs(p, argv[i], bufspace);
    166 		}
    167 		if (bytes == -1) {
    168 			el_free(wargv);
    169 			return NULL;
    170 		} else
    171 			bytes++;  /* include '\0' in the count */
    172 		bufspace -= bytes;
    173 		p += bytes;
    174 	}
    175 
    176 	return wargv;
    177 }
    178 
    179 
    180 private size_t
    181 enc_width(Char c)
    182 {
    183 	/* UTF-8 encoding specific values */
    184 	if (c < 0x80)
    185 		return 1;
    186 	else if (c < 0x0800)
    187 		return 2;
    188 	else if (c < 0x10000)
    189 		return 3;
    190 	else if (c < 0x110000)
    191 		return 4;
    192 	else
    193 		return 0; /* not a valid codepoint */
    194 }
    195 
    196 protected ssize_t
    197 ct_encode_char(char *dst, size_t len, Char c)
    198 {
    199 	ssize_t l = 0;
    200 	if (len < enc_width(c))
    201 		return -1;
    202 	l = ct_wctomb(dst, c);
    203 
    204 	if (l < 0) {
    205 		ct_wctomb_reset;
    206 		l = 0;
    207 	}
    208 	return l;
    209 }
    210 #endif
    211 
    212 protected const Char *
    213 ct_visual_string(const Char *s)
    214 {
    215 	static Char *buff = NULL;
    216 	static size_t buffsize = 0;
    217 	void *p;
    218 	Char *dst;
    219 	ssize_t used = 0;
    220 
    221 	if (!s)
    222 		return NULL;
    223 	if (!buff) {
    224 	    buffsize = CT_BUFSIZ;
    225 	    buff = el_malloc(buffsize * sizeof(*buff));
    226 	}
    227 	dst = buff;
    228 	while (*s) {
    229 		used = ct_visual_char(dst, buffsize - (dst - buff), *s);
    230 		if (used == -1) { /* failed to encode, need more buffer space */
    231 			used = dst - buff;
    232 			buffsize += CT_BUFSIZ;
    233 			p = el_realloc(buff, buffsize * sizeof(*buff));
    234 			if (p == NULL)
    235 				goto out;
    236 			buff = p;
    237 			dst = buff + used;
    238 			/* don't increment s here - we want to retry it! */
    239 		}
    240 		else
    241 		    ++s;
    242 		dst += used;
    243 	}
    244 	if (dst >= (buff + buffsize)) { /* sigh */
    245 		buffsize += 1;
    246 		p = el_realloc(buff, buffsize * sizeof(*buff));
    247 		if (p == NULL)
    248 			goto out;
    249 		buff = p;
    250 		dst = buff + buffsize - 1;
    251 	}
    252 	*dst = 0;
    253 	return buff;
    254 out:
    255 	el_free(buff);
    256 	buffsize = 0;
    257 	return NULL;
    258 }
    259 
    260 
    261 
    262 protected int
    263 ct_visual_width(Char c)
    264 {
    265 	int t = ct_chr_class(c);
    266 	switch (t) {
    267 	case CHTYPE_ASCIICTL:
    268 		return 2; /* ^@ ^? etc. */
    269 	case CHTYPE_TAB:
    270 		return 1; /* Hmm, this really need to be handled outside! */
    271 	case CHTYPE_NL:
    272 		return 0; /* Should this be 1 instead? */
    273 #ifdef WIDECHAR
    274 	case CHTYPE_PRINT:
    275 		return wcwidth(c);
    276 	case CHTYPE_NONPRINT:
    277 		if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
    278 			return 8; /* \U+12345 */
    279 		else
    280 			return 7; /* \U+1234 */
    281 #else
    282 	case CHTYPE_PRINT:
    283 		return 1;
    284 	case CHTYPE_NONPRINT:
    285 		return 4; /* \123 */
    286 #endif
    287 	default:
    288 		return 0; /* should not happen */
    289 	}
    290 }
    291 
    292 
    293 protected ssize_t
    294 ct_visual_char(Char *dst, size_t len, Char c)
    295 {
    296 	int t = ct_chr_class(c);
    297 	switch (t) {
    298 	case CHTYPE_TAB:
    299 	case CHTYPE_NL:
    300 	case CHTYPE_ASCIICTL:
    301 		if (len < 2)
    302 			return -1;   /* insufficient space */
    303 		*dst++ = '^';
    304 		if (c == '\177')
    305 			*dst = '?'; /* DEL -> ^? */
    306 		else
    307 			*dst = c | 0100;    /* uncontrolify it */
    308 		return 2;
    309 	case CHTYPE_PRINT:
    310 		if (len < 1)
    311 			return -1;  /* insufficient space */
    312 		*dst = c;
    313 		return 1;
    314 	case CHTYPE_NONPRINT:
    315 		/* we only use single-width glyphs for display,
    316 		 * so this is right */
    317 		if ((ssize_t)len < ct_visual_width(c))
    318 			return -1;   /* insufficient space */
    319 #ifdef WIDECHAR
    320 		*dst++ = '\\';
    321 		*dst++ = 'U';
    322 		*dst++ = '+';
    323 #define tohexdigit(v) "0123456789ABCDEF"[v]
    324 		if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
    325 			*dst++ = tohexdigit(((unsigned int) c >> 16) & 0xf);
    326 		*dst++ = tohexdigit(((unsigned int) c >> 12) & 0xf);
    327 		*dst++ = tohexdigit(((unsigned int) c >>  8) & 0xf);
    328 		*dst++ = tohexdigit(((unsigned int) c >>  4) & 0xf);
    329 		*dst   = tohexdigit(((unsigned int) c      ) & 0xf);
    330 		return (c > 0xffff) ? 8 : 7;
    331 #else
    332 		*dst++ = '\\';
    333 #define tooctaldigit(v) ((v) + '0')
    334 		*dst++ = tooctaldigit(((unsigned int) c >> 6) & 0x7);
    335 		*dst++ = tooctaldigit(((unsigned int) c >> 3) & 0x7);
    336 		*dst++ = tooctaldigit(((unsigned int) c     ) & 0x7);
    337 #endif
    338 		/*FALLTHROUGH*/
    339 	/* these two should be handled outside this function */
    340 	default:            /* we should never hit the default */
    341 		return 0;
    342 	}
    343 }
    344 
    345 
    346 
    347 
    348 protected int
    349 ct_chr_class(Char c)
    350 {
    351 	if (c == '\t')
    352 		return CHTYPE_TAB;
    353 	else if (c == '\n')
    354 		return CHTYPE_NL;
    355 	else if (IsASCII(c) && Iscntrl(c))
    356 		return CHTYPE_ASCIICTL;
    357 	else if (Isprint(c))
    358 		return CHTYPE_PRINT;
    359 	else
    360 		return CHTYPE_NONPRINT;
    361 }
    362