Home | History | Annotate | Line # | Download | only in dist
cvt.c revision 1.1
      1 /*	$NetBSD	*/
      2 
      3 /*
      4  * Copyright (C) 1984-2011  Mark Nudelman
      5  *
      6  * You may distribute under the terms of either the GNU General Public
      7  * License or the Less License, as specified in the README file.
      8  *
      9  * For more information about less, or for information on how to
     10  * contact the author, see the README file.
     11  */
     12 
     13 /*
     14  * Routines to convert text in various ways.  Used by search.
     15  */
     16 
     17 #include "less.h"
     18 #include "charset.h"
     19 
     20 extern int utf_mode;
     21 
     22 /*
     23  * Get the length of a buffer needed to convert a string.
     24  */
     25 	public int
     26 cvt_length(len, ops)
     27 	int len;
     28 	int ops;
     29 {
     30 	if (utf_mode)
     31 		/*
     32 		 * Just copying a string in UTF-8 mode can cause it to grow
     33 		 * in length.
     34 		 * Four output bytes for one input byte is the worst case.
     35 		 */
     36 		len *= 4;
     37 	return (len + 1);
     38 }
     39 
     40 /*
     41  * Allocate a chpos array for use by cvt_text.
     42  */
     43 	public int *
     44 cvt_alloc_chpos(len)
     45 	int len;
     46 {
     47 	int i;
     48 	int *chpos = (int *) ecalloc(sizeof(int), len);
     49 	/* Initialize all entries to an invalid position. */
     50 	for (i = 0;  i < len;  i++)
     51 		chpos[i] = -1;
     52 	return (chpos);
     53 }
     54 
     55 /*
     56  * Convert text.  Perform the transformations specified by ops.
     57  * Returns converted text in odst.  The original offset of each
     58  * odst character (when it was in osrc) is returned in the chpos array.
     59  */
     60 	public void
     61 cvt_text(odst, osrc, chpos, lenp, ops)
     62 	char *odst;
     63 	char *osrc;
     64 	int *chpos;
     65 	int *lenp;
     66 	int ops;
     67 {
     68 	char *dst;
     69 	char *src;
     70 	register char *src_end;
     71 	LWCHAR ch;
     72 
     73 	if (lenp != NULL)
     74 		src_end = osrc + *lenp;
     75 	else
     76 		src_end = osrc + strlen(osrc);
     77 
     78 	for (src = osrc, dst = odst;  src < src_end;  )
     79 	{
     80 		int src_pos = src - osrc;
     81 		int dst_pos = dst - odst;
     82 		ch = step_char(&src, +1, src_end);
     83 		if ((ops & CVT_BS) && ch == '\b' && dst > odst)
     84 		{
     85 			/* Delete backspace and preceding char. */
     86 			do {
     87 				dst--;
     88 			} while (dst > odst &&
     89 				!IS_ASCII_OCTET(*dst) && !IS_UTF8_LEAD(*dst));
     90 		} else if ((ops & CVT_ANSI) && IS_CSI_START(ch))
     91 		{
     92 			/* Skip to end of ANSI escape sequence. */
     93 			src++;  /* skip the CSI start char */
     94 			while (src < src_end)
     95 				if (!is_ansi_middle(*src++))
     96 					break;
     97 		} else
     98 		{
     99 			/* Just copy the char to the destination buffer. */
    100 			if ((ops & CVT_TO_LC) && IS_UPPER(ch))
    101 				ch = TO_LOWER(ch);
    102 			put_wchar(&dst, ch);
    103 			/*
    104 			 * Record the original position of the char.
    105 			 * But if we've already recorded a position
    106 			 * for this char (due to a backspace), leave
    107 			 * it alone; if multiple source chars map to
    108 			 * one destination char, we want the position
    109 			 * of the first one.
    110 			 */
    111 			if (chpos != NULL && chpos[dst_pos] < 0)
    112 				chpos[dst_pos] = src_pos;
    113 		}
    114 	}
    115 	if ((ops & CVT_CRLF) && dst > odst && dst[-1] == '\r')
    116 		dst--;
    117 	*dst = '\0';
    118 	if (lenp != NULL)
    119 		*lenp = dst - odst;
    120 	if (chpos != NULL)
    121 		chpos[dst - odst] = src - osrc;
    122 }
    123