Home | History | Annotate | Line # | Download | only in gnulib-lib
      1 /* Determine the number of screen columns needed for a string.
      2    Copyright (C) 2000-2006 Free Software Foundation, Inc.
      3 
      4    This program is free software; you can redistribute it and/or modify
      5    it under the terms of the GNU General Public License as published by
      6    the Free Software Foundation; either version 2, or (at your option)
      7    any later version.
      8 
      9    This program is distributed in the hope that it will be useful,
     10    but WITHOUT ANY WARRANTY; without even the implied warranty of
     11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12    GNU General Public License for more details.
     13 
     14    You should have received a copy of the GNU General Public License
     15    along with this program; if not, write to the Free Software Foundation,
     16    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
     17 
     18 /* Written by Bruno Haible <haible (at) clisp.cons.org>.  */
     19 
     20 #include <config.h>
     21 
     22 /* Specification.  */
     23 #include "mbswidth.h"
     24 
     25 /* Get MB_CUR_MAX.  */
     26 #include <stdlib.h>
     27 
     28 #include <string.h>
     29 
     30 /* Get isprint().  */
     31 #include <ctype.h>
     32 
     33 /* Get mbstate_t, mbrtowc(), mbsinit().  */
     34 #if HAVE_WCHAR_H
     35 /* Tru64 with Desktop Toolkit C has a bug: <stdio.h> must be included before
     36    <wchar.h>.
     37    BSD/OS 4.1 has a bug: <stdio.h> and <time.h> must be included before
     38    <wchar.h>.  */
     39 # include <stdio.h>
     40 # include <time.h>
     41 # include <wchar.h>
     42 #endif
     43 
     44 /* Get wcwidth().  */
     45 #include "wcwidth.h"
     46 
     47 /* Get iswcntrl().  */
     48 #if HAVE_WCTYPE_H
     49 # include <wctype.h>
     50 #endif
     51 #if !defined iswcntrl && !HAVE_ISWCNTRL
     52 # define iswcntrl(wc) (((wc) & ~0x1f) == 0 || (wc) == 0x7f)
     53 #endif
     54 
     55 #ifndef mbsinit
     56 # if !HAVE_MBSINIT
     57 #  define mbsinit(ps) 1
     58 # endif
     59 #endif
     60 
     61 /* Returns the number of columns needed to represent the multibyte
     62    character string pointed to by STRING.  If a non-printable character
     63    occurs, and MBSW_REJECT_UNPRINTABLE is specified, -1 is returned.
     64    With flags = MBSW_REJECT_INVALID | MBSW_REJECT_UNPRINTABLE, this is
     65    the multibyte analogue of the wcswidth function.
     66    If STRING is not of length < INT_MAX / 2, integer overflow can occur.  */
     67 int
     68 mbswidth (const char *string, int flags)
     69 {
     70   return mbsnwidth (string, strlen (string), flags);
     71 }
     72 
     73 /* Returns the number of columns needed to represent the multibyte
     74    character string pointed to by STRING of length NBYTES.  If a
     75    non-printable character occurs, and MBSW_REJECT_UNPRINTABLE is
     76    specified, -1 is returned.
     77    If NBYTES is not < INT_MAX / 2, integer overflow can occur.  */
     78 int
     79 mbsnwidth (const char *string, size_t nbytes, int flags)
     80 {
     81   const char *p = string;
     82   const char *plimit = p + nbytes;
     83   int width;
     84 
     85   width = 0;
     86 #if HAVE_MBRTOWC
     87   if (MB_CUR_MAX > 1)
     88     {
     89       while (p < plimit)
     90 	switch (*p)
     91 	  {
     92 	    case ' ': case '!': case '"': case '#': case '%':
     93 	    case '&': case '\'': case '(': case ')': case '*':
     94 	    case '+': case ',': case '-': case '.': case '/':
     95 	    case '0': case '1': case '2': case '3': case '4':
     96 	    case '5': case '6': case '7': case '8': case '9':
     97 	    case ':': case ';': case '<': case '=': case '>':
     98 	    case '?':
     99 	    case 'A': case 'B': case 'C': case 'D': case 'E':
    100 	    case 'F': case 'G': case 'H': case 'I': case 'J':
    101 	    case 'K': case 'L': case 'M': case 'N': case 'O':
    102 	    case 'P': case 'Q': case 'R': case 'S': case 'T':
    103 	    case 'U': case 'V': case 'W': case 'X': case 'Y':
    104 	    case 'Z':
    105 	    case '[': case '\\': case ']': case '^': case '_':
    106 	    case 'a': case 'b': case 'c': case 'd': case 'e':
    107 	    case 'f': case 'g': case 'h': case 'i': case 'j':
    108 	    case 'k': case 'l': case 'm': case 'n': case 'o':
    109 	    case 'p': case 'q': case 'r': case 's': case 't':
    110 	    case 'u': case 'v': case 'w': case 'x': case 'y':
    111 	    case 'z': case '{': case '|': case '}': case '~':
    112 	      /* These characters are printable ASCII characters.  */
    113 	      p++;
    114 	      width++;
    115 	      break;
    116 	    default:
    117 	      /* If we have a multibyte sequence, scan it up to its end.  */
    118 	      {
    119 		mbstate_t mbstate;
    120 		memset (&mbstate, 0, sizeof mbstate);
    121 		do
    122 		  {
    123 		    wchar_t wc;
    124 		    size_t bytes;
    125 		    int w;
    126 
    127 		    bytes = mbrtowc (&wc, p, plimit - p, &mbstate);
    128 
    129 		    if (bytes == (size_t) -1)
    130 		      /* An invalid multibyte sequence was encountered.  */
    131 		      {
    132 			if (!(flags & MBSW_REJECT_INVALID))
    133 			  {
    134 			    p++;
    135 			    width++;
    136 			    break;
    137 			  }
    138 			else
    139 			  return -1;
    140 		      }
    141 
    142 		    if (bytes == (size_t) -2)
    143 		      /* An incomplete multibyte character at the end.  */
    144 		      {
    145 			if (!(flags & MBSW_REJECT_INVALID))
    146 			  {
    147 			    p = plimit;
    148 			    width++;
    149 			    break;
    150 			  }
    151 			else
    152 			  return -1;
    153 		      }
    154 
    155 		    if (bytes == 0)
    156 		      /* A null wide character was encountered.  */
    157 		      bytes = 1;
    158 
    159 		    w = wcwidth (wc);
    160 		    if (w >= 0)
    161 		      /* A printable multibyte character.  */
    162 		      width += w;
    163 		    else
    164 		      /* An unprintable multibyte character.  */
    165 		      if (!(flags & MBSW_REJECT_UNPRINTABLE))
    166 			width += (iswcntrl (wc) ? 0 : 1);
    167 		      else
    168 			return -1;
    169 
    170 		    p += bytes;
    171 		  }
    172 		while (! mbsinit (&mbstate));
    173 	      }
    174 	      break;
    175 	  }
    176       return width;
    177     }
    178 #endif
    179 
    180   while (p < plimit)
    181     {
    182       unsigned char c = (unsigned char) *p++;
    183 
    184       if (isprint (c))
    185 	width++;
    186       else if (!(flags & MBSW_REJECT_UNPRINTABLE))
    187 	width += (iscntrl (c) ? 0 : 1);
    188       else
    189 	return -1;
    190     }
    191   return width;
    192 }
    193