Home | History | Annotate | Line # | Download | only in libedit
chartype.c revision 1.25
      1 /*	$NetBSD: chartype.c,v 1.25 2016/04/09 18:43:17 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2009 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 /*
     30  * chartype.c: character classification and meta information
     31  */
     32 #include "config.h"
     33 #if !defined(lint) && !defined(SCCSID)
     34 __RCSID("$NetBSD: chartype.c,v 1.25 2016/04/09 18:43:17 christos Exp $");
     35 #endif /* not lint && not SCCSID */
     36 
     37 #include <ctype.h>
     38 #include <stdlib.h>
     39 #include <string.h>
     40 
     41 #include "el.h"
     42 
     43 #define CT_BUFSIZ ((size_t)1024)
     44 
     45 protected int
     46 ct_conv_cbuff_resize(ct_buffer_t *conv, size_t csize)
     47 {
     48 	void *p;
     49 
     50 	if (csize <= conv->csize)
     51 		return 0;
     52 
     53 	conv->csize = csize;
     54 
     55 	p = el_realloc(conv->cbuff, conv->csize * sizeof(*conv->cbuff));
     56 	if (p == NULL) {
     57 		conv->csize = 0;
     58 		el_free(conv->cbuff);
     59 		conv->cbuff = NULL;
     60 		return -1;
     61 	}
     62 	conv->cbuff = p;
     63 	return 0;
     64 }
     65 
     66 protected int
     67 ct_conv_wbuff_resize(ct_buffer_t *conv, size_t wsize)
     68 {
     69 	void *p;
     70 
     71 	if (wsize <= conv->wsize)
     72 		return 0;
     73 
     74 	conv->wsize = wsize;
     75 
     76 	p = el_realloc(conv->wbuff, conv->wsize * sizeof(*conv->wbuff));
     77 	if (p == NULL) {
     78 		conv->wsize = 0;
     79 		el_free(conv->wbuff);
     80 		conv->wbuff = NULL;
     81 		return -1;
     82 	}
     83 	conv->wbuff = p;
     84 	return 0;
     85 }
     86 
     87 
     88 public char *
     89 ct_encode_string(const Char *s, ct_buffer_t *conv)
     90 {
     91 	char *dst;
     92 	ssize_t used;
     93 
     94 	if (!s)
     95 		return NULL;
     96 
     97 	dst = conv->cbuff;
     98 	for (;;) {
     99 		used = (ssize_t)(dst - conv->cbuff);
    100 		if ((conv->csize - (size_t)used) < 5) {
    101 			if (ct_conv_cbuff_resize(conv,
    102 			    conv->csize + CT_BUFSIZ) == -1)
    103 				return NULL;
    104 			dst = conv->cbuff + used;
    105 		}
    106 		if (!*s)
    107 			break;
    108 		used = ct_encode_char(dst, (size_t)5, *s);
    109 		if (used == -1) /* failed to encode, need more buffer space */
    110 			abort();
    111 		++s;
    112 		dst += used;
    113 	}
    114 	*dst = '\0';
    115 	return conv->cbuff;
    116 }
    117 
    118 public Char *
    119 ct_decode_string(const char *s, ct_buffer_t *conv)
    120 {
    121 	size_t len;
    122 
    123 	if (!s)
    124 		return NULL;
    125 
    126 	len = mbstowcs(NULL, s, (size_t)0);
    127 	if (len == (size_t)-1)
    128 		return NULL;
    129 
    130 	if (conv->wsize < ++len)
    131 		if (ct_conv_wbuff_resize(conv, len + CT_BUFSIZ) == -1)
    132 			return NULL;
    133 
    134 	mbstowcs(conv->wbuff, s, conv->wsize);
    135 	return conv->wbuff;
    136 }
    137 
    138 
    139 protected Char **
    140 ct_decode_argv(int argc, const char *argv[], ct_buffer_t *conv)
    141 {
    142 	size_t bufspace;
    143 	int i;
    144 	Char *p;
    145 	Char **wargv;
    146 	ssize_t bytes;
    147 
    148 	/* Make sure we have enough space in the conversion buffer to store all
    149 	 * the argv strings. */
    150 	for (i = 0, bufspace = 0; i < argc; ++i)
    151 		bufspace += argv[i] ? strlen(argv[i]) + 1 : 0;
    152 	if (conv->wsize < ++bufspace)
    153 		if (ct_conv_wbuff_resize(conv, bufspace + CT_BUFSIZ) == -1)
    154 			return NULL;
    155 
    156 	wargv = el_malloc((size_t)argc * sizeof(*wargv));
    157 
    158 	for (i = 0, p = conv->wbuff; i < argc; ++i) {
    159 		if (!argv[i]) {   /* don't pass null pointers to mbstowcs */
    160 			wargv[i] = NULL;
    161 			continue;
    162 		} else {
    163 			wargv[i] = p;
    164 			bytes = (ssize_t)mbstowcs(p, argv[i], bufspace);
    165 		}
    166 		if (bytes == -1) {
    167 			el_free(wargv);
    168 			return NULL;
    169 		} else
    170 			bytes++;  /* include '\0' in the count */
    171 		bufspace -= (size_t)bytes;
    172 		p += bytes;
    173 	}
    174 
    175 	return wargv;
    176 }
    177 
    178 
    179 protected size_t
    180 ct_enc_width(Char c)
    181 {
    182 	/* UTF-8 encoding specific values */
    183 	if (c < 0x80)
    184 		return 1;
    185 	else if (c < 0x0800)
    186 		return 2;
    187 	else if (c < 0x10000)
    188 		return 3;
    189 	else if (c < 0x110000)
    190 		return 4;
    191 	else
    192 		return 0; /* not a valid codepoint */
    193 }
    194 
    195 protected ssize_t
    196 ct_encode_char(char *dst, size_t len, Char c)
    197 {
    198 	ssize_t l = 0;
    199 	if (len < ct_enc_width(c))
    200 		return -1;
    201 	l = wctomb(dst, c);
    202 
    203 	if (l < 0) {
    204 		wctomb(NULL, L'\0');
    205 		l = 0;
    206 	}
    207 	return l;
    208 }
    209 
    210 protected const Char *
    211 ct_visual_string(const Char *s)
    212 {
    213 	static Char *buff = NULL;
    214 	static size_t buffsize = 0;
    215 	void *p;
    216 	Char *dst;
    217 	ssize_t used = 0;
    218 
    219 	if (!s)
    220 		return NULL;
    221 	if (!buff) {
    222 	    buffsize = CT_BUFSIZ;
    223 	    buff = el_malloc(buffsize * sizeof(*buff));
    224 	}
    225 	dst = buff;
    226 	while (*s) {
    227 		used = ct_visual_char(dst, buffsize - (size_t)(dst - buff), *s);
    228 		if (used == -1) { /* failed to encode, need more buffer space */
    229 			used = dst - buff;
    230 			buffsize += CT_BUFSIZ;
    231 			p = el_realloc(buff, buffsize * sizeof(*buff));
    232 			if (p == NULL)
    233 				goto out;
    234 			buff = p;
    235 			dst = buff + used;
    236 			/* don't increment s here - we want to retry it! */
    237 		}
    238 		else
    239 		    ++s;
    240 		dst += used;
    241 	}
    242 	if (dst >= (buff + buffsize)) { /* sigh */
    243 		buffsize += 1;
    244 		p = el_realloc(buff, buffsize * sizeof(*buff));
    245 		if (p == NULL)
    246 			goto out;
    247 		buff = p;
    248 		dst = buff + buffsize - 1;
    249 	}
    250 	*dst = 0;
    251 	return buff;
    252 out:
    253 	el_free(buff);
    254 	buffsize = 0;
    255 	return NULL;
    256 }
    257 
    258 
    259 
    260 protected int
    261 ct_visual_width(Char c)
    262 {
    263 	int t = ct_chr_class(c);
    264 	switch (t) {
    265 	case CHTYPE_ASCIICTL:
    266 		return 2; /* ^@ ^? etc. */
    267 	case CHTYPE_TAB:
    268 		return 1; /* Hmm, this really need to be handled outside! */
    269 	case CHTYPE_NL:
    270 		return 0; /* Should this be 1 instead? */
    271 	case CHTYPE_PRINT:
    272 		return wcwidth(c);
    273 	case CHTYPE_NONPRINT:
    274 		if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
    275 			return 8; /* \U+12345 */
    276 		else
    277 			return 7; /* \U+1234 */
    278 	default:
    279 		return 0; /* should not happen */
    280 	}
    281 }
    282 
    283 
    284 protected ssize_t
    285 ct_visual_char(Char *dst, size_t len, Char c)
    286 {
    287 	int t = ct_chr_class(c);
    288 	switch (t) {
    289 	case CHTYPE_TAB:
    290 	case CHTYPE_NL:
    291 	case CHTYPE_ASCIICTL:
    292 		if (len < 2)
    293 			return -1;   /* insufficient space */
    294 		*dst++ = '^';
    295 		if (c == '\177')
    296 			*dst = '?'; /* DEL -> ^? */
    297 		else
    298 			*dst = c | 0100;    /* uncontrolify it */
    299 		return 2;
    300 	case CHTYPE_PRINT:
    301 		if (len < 1)
    302 			return -1;  /* insufficient space */
    303 		*dst = c;
    304 		return 1;
    305 	case CHTYPE_NONPRINT:
    306 		/* we only use single-width glyphs for display,
    307 		 * so this is right */
    308 		if ((ssize_t)len < ct_visual_width(c))
    309 			return -1;   /* insufficient space */
    310 		*dst++ = '\\';
    311 		*dst++ = 'U';
    312 		*dst++ = '+';
    313 #define tohexdigit(v) "0123456789ABCDEF"[v]
    314 		if (c > 0xffff) /* prefer standard 4-byte display over 5-byte */
    315 			*dst++ = tohexdigit(((unsigned int) c >> 16) & 0xf);
    316 		*dst++ = tohexdigit(((unsigned int) c >> 12) & 0xf);
    317 		*dst++ = tohexdigit(((unsigned int) c >>  8) & 0xf);
    318 		*dst++ = tohexdigit(((unsigned int) c >>  4) & 0xf);
    319 		*dst   = tohexdigit(((unsigned int) c      ) & 0xf);
    320 		return c > 0xffff ? 8 : 7;
    321 		/*FALLTHROUGH*/
    322 	/* these two should be handled outside this function */
    323 	default:            /* we should never hit the default */
    324 		return 0;
    325 	}
    326 }
    327 
    328 
    329 
    330 
    331 protected int
    332 ct_chr_class(Char c)
    333 {
    334 	if (c == '\t')
    335 		return CHTYPE_TAB;
    336 	else if (c == '\n')
    337 		return CHTYPE_NL;
    338 	else if (c < 0x100 && iswcntrl(c))
    339 		return CHTYPE_ASCIICTL;
    340 	else if (iswprint(c))
    341 		return CHTYPE_PRINT;
    342 	else
    343 		return CHTYPE_NONPRINT;
    344 }
    345