Home | History | Annotate | Line # | Download | only in more
      1 /*	$NetBSD: line.c,v 1.5 2003/10/13 14:34:25 agc Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1988 Mark Nudelman
      5  * Copyright (c) 1988, 1993
      6  *	The Regents of the University of California.  All rights reserved.
      7  *
      8  * Redistribution and use in source and binary forms, with or without
      9  * modification, are permitted provided that the following conditions
     10  * are met:
     11  * 1. Redistributions of source code must retain the above copyright
     12  *    notice, this list of conditions and the following disclaimer.
     13  * 2. Redistributions in binary form must reproduce the above copyright
     14  *    notice, this list of conditions and the following disclaimer in the
     15  *    documentation and/or other materials provided with the distribution.
     16  * 3. Neither the name of the University nor the names of its contributors
     17  *    may be used to endorse or promote products derived from this software
     18  *    without specific prior written permission.
     19  *
     20  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     30  * SUCH DAMAGE.
     31  */
     32 
     33 #include <sys/cdefs.h>
     34 #ifndef lint
     35 #if 0
     36 static char sccsid[] = "@(#)line.c	8.1 (Berkeley) 6/6/93";
     37 #else
     38 __RCSID("$NetBSD: line.c,v 1.5 2003/10/13 14:34:25 agc Exp $");
     39 #endif
     40 #endif /* not lint */
     41 
     42 /*
     43  * Routines to manipulate the "line buffer".
     44  * The line buffer holds a line of output as it is being built
     45  * in preparation for output to the screen.
     46  * We keep track of the PRINTABLE length of the line as it is being built.
     47  */
     48 
     49 #include <sys/types.h>
     50 #include <ctype.h>
     51 
     52 #include "less.h"
     53 #include "extern.h"
     54 
     55 static char linebuf[1024];	/* Buffer which holds the current output line */
     56 static char *curr;		/* Pointer into linebuf */
     57 static int column;		/* Printable length, accounting for
     58 				   backspaces, etc. */
     59 /*
     60  * A ridiculously complex state machine takes care of backspaces.  The
     61  * complexity arises from the attempt to deal with all cases, especially
     62  * involving long lines with underlining, boldfacing or whatever.  There
     63  * are still some cases which will break it.
     64  *
     65  * There are four states:
     66  *	LN_NORMAL is the normal state (not in underline mode).
     67  *	LN_UNDERLINE means we are in underline mode.  We expect to get
     68  *		either a sequence like "_\bX" or "X\b_" to continue
     69  *		underline mode, or anything else to end underline mode.
     70  *	LN_BOLDFACE means we are in boldface mode.  We expect to get sequences
     71  *		like "X\bX\b...X\bX" to continue boldface mode, or anything
     72  *		else to end boldface mode.
     73  *	LN_UL_X means we are one character after LN_UNDERLINE
     74  *		(we have gotten the '_' in "_\bX" or the 'X' in "X\b_").
     75  *	LN_UL_XB means we are one character after LN_UL_X
     76  *		(we have gotten the backspace in "_\bX" or "X\b_";
     77  *		we expect one more ordinary character,
     78  *		which will put us back in state LN_UNDERLINE).
     79  *	LN_BO_X means we are one character after LN_BOLDFACE
     80  *		(we have gotten the 'X' in "X\bX").
     81  *	LN_BO_XB means we are one character after LN_BO_X
     82  *		(we have gotten the backspace in "X\bX";
     83  *		we expect one more 'X' which will put us back
     84  *		in LN_BOLDFACE).
     85  */
     86 static int ln_state;		/* Currently in normal/underline/bold/etc mode? */
     87 #define	LN_NORMAL	0	/* Not in underline, boldface or whatever mode */
     88 #define	LN_UNDERLINE	1	/* In underline, need next char */
     89 #define	LN_UL_X		2	/* In underline, got char, need \b */
     90 #define	LN_UL_XB	3	/* In underline, got char & \b, need one more */
     91 #define	LN_BOLDFACE	4	/* In boldface, need next char */
     92 #define	LN_BO_X		5	/* In boldface, got char, need \b */
     93 #define	LN_BO_XB	6	/* In boldface, got char & \b, need same char */
     94 
     95 char *line;			/* Pointer to the current line.
     96 				   Usually points to linebuf. */
     97 /*
     98  * Rewind the line buffer.
     99  */
    100 void
    101 prewind()
    102 {
    103 	line = curr = linebuf;
    104 	ln_state = LN_NORMAL;
    105 	column = 0;
    106 }
    107 
    108 /*
    109  * Append a character to the line buffer.
    110  * Expand tabs into spaces, handle underlining, boldfacing, etc.
    111  * Returns 0 if ok, 1 if couldn't fit in buffer.
    112  */
    113 #define	NEW_COLUMN(addon) \
    114 	if (column + addon + (ln_state ? ue_width : 0) > sc_width) \
    115 		return(1); \
    116 	else \
    117 		column += addon
    118 
    119 int
    120 pappend(c)
    121 	int c;
    122 {
    123 	if (c == '\0') {
    124 		/*
    125 		 * Terminate any special modes, if necessary.
    126 		 * Append a '\0' to the end of the line.
    127 		 */
    128 		switch (ln_state) {
    129 		case LN_UL_X:
    130 			curr[0] = curr[-1];
    131 			curr[-1] = UE_CHAR;
    132 			curr++;
    133 			break;
    134 		case LN_BO_X:
    135 			curr[0] = curr[-1];
    136 			curr[-1] = BE_CHAR;
    137 			curr++;
    138 			break;
    139 		case LN_UL_XB:
    140 		case LN_UNDERLINE:
    141 			*curr++ = UE_CHAR;
    142 			break;
    143 		case LN_BO_XB:
    144 		case LN_BOLDFACE:
    145 			*curr++ = BE_CHAR;
    146 			break;
    147 		}
    148 		ln_state = LN_NORMAL;
    149 		*curr = '\0';
    150 		return(0);
    151 	}
    152 
    153 	if (curr > linebuf + sizeof(linebuf) - 12)
    154 		/*
    155 		 * Almost out of room in the line buffer.
    156 		 * Don't take any chances.
    157 		 * {{ Linebuf is supposed to be big enough that this
    158 		 *    will never happen, but may need to be made
    159 		 *    bigger for wide screens or lots of backspaces. }}
    160 		 */
    161 		return(1);
    162 
    163 	if (!bs_mode) {
    164 		/*
    165 		 * Advance the state machine.
    166 		 */
    167 		switch (ln_state) {
    168 		case LN_NORMAL:
    169 			if (curr <= linebuf + 1
    170 			    || curr[-1] != (char)('H' | 0200))
    171 				break;
    172 			column -= 2;
    173 			if (c == curr[-2])
    174 				goto enter_boldface;
    175 			if (c == '_' || curr[-2] == '_')
    176 				goto enter_underline;
    177 			curr -= 2;
    178 			break;
    179 
    180 enter_boldface:
    181 			/*
    182 			 * We have "X\bX" (including the current char).
    183 			 * Switch into boldface mode.
    184 			 */
    185 			column--;
    186 			if (column + bo_width + be_width + 1 >= sc_width)
    187 				/*
    188 				 * Not enough room left on the screen to
    189 				 * enter and exit boldface mode.
    190 				 */
    191 				return (1);
    192 
    193 			if (bo_width > 0 && curr > linebuf + 2
    194 			    && curr[-3] == ' ') {
    195 				/*
    196 				 * Special case for magic cookie terminals:
    197 				 * if the previous char was a space, replace
    198 				 * it with the "enter boldface" sequence.
    199 				 */
    200 				curr[-3] = BO_CHAR;
    201 				column += bo_width-1;
    202 			} else {
    203 				curr[-1] = curr[-2];
    204 				curr[-2] = BO_CHAR;
    205 				column += bo_width;
    206 				curr++;
    207 			}
    208 			goto ln_bo_xb_case;
    209 
    210 enter_underline:
    211 			/*
    212 			 * We have either "_\bX" or "X\b_" (including
    213 			 * the current char).  Switch into underline mode.
    214 			 */
    215 			column--;
    216 			if (column + ul_width + ue_width + 1 >= sc_width)
    217 				/*
    218 				 * Not enough room left on the screen to
    219 				 * enter and exit underline mode.
    220 				 */
    221 				return (1);
    222 
    223 			if (ul_width > 0 &&
    224 			    curr > linebuf + 2 && curr[-3] == ' ')
    225 			{
    226 				/*
    227 				 * Special case for magic cookie terminals:
    228 				 * if the previous char was a space, replace
    229 				 * it with the "enter underline" sequence.
    230 				 */
    231 				curr[-3] = UL_CHAR;
    232 				column += ul_width-1;
    233 			} else
    234 			{
    235 				curr[-1] = curr[-2];
    236 				curr[-2] = UL_CHAR;
    237 				column += ul_width;
    238 				curr++;
    239 			}
    240 			goto ln_ul_xb_case;
    241 			/*NOTREACHED*/
    242 		case LN_UL_XB:
    243 			/*
    244 			 * Termination of a sequence "_\bX" or "X\b_".
    245 			 */
    246 			if (c != '_' && curr[-2] != '_' && c == curr[-2])
    247 			{
    248 				/*
    249 				 * We seem to have run on from underlining
    250 				 * into boldfacing - this is a nasty fix, but
    251 				 * until this whole routine is rewritten as a
    252 				 * real DFA, ...  well ...
    253 				 */
    254 				curr[0] = curr[-2];
    255 				curr[-2] = UE_CHAR;
    256 				curr[-1] = BO_CHAR;
    257 				curr += 2; /* char & non-existent backspace */
    258 				ln_state = LN_BO_XB;
    259 				goto ln_bo_xb_case;
    260 			}
    261 ln_ul_xb_case:
    262 			if (c == '_')
    263 				c = curr[-2];
    264 			curr -= 2;
    265 			ln_state = LN_UNDERLINE;
    266 			break;
    267 		case LN_BO_XB:
    268 			/*
    269 			 * Termination of a sequnce "X\bX".
    270 			 */
    271 			if (c != curr[-2] && (c == '_' || curr[-2] == '_'))
    272 			{
    273 				/*
    274 				 * We seem to have run on from
    275 				 * boldfacing into underlining.
    276 				 */
    277 				curr[0] = curr[-2];
    278 				curr[-2] = BE_CHAR;
    279 				curr[-1] = UL_CHAR;
    280 				curr += 2; /* char & non-existent backspace */
    281 				ln_state = LN_UL_XB;
    282 				goto ln_ul_xb_case;
    283 			}
    284 ln_bo_xb_case:
    285 			curr -= 2;
    286 			ln_state = LN_BOLDFACE;
    287 			break;
    288 		case LN_UNDERLINE:
    289 			if (column + ue_width + bo_width + 1 + be_width >= sc_width)
    290 				/*
    291 				 * We have just barely enough room to
    292 				 * exit underline mode and handle a possible
    293 				 * underline/boldface run on mixup.
    294 				 */
    295 				return (1);
    296 			ln_state = LN_UL_X;
    297 			break;
    298 		case LN_BOLDFACE:
    299 			if (c == '\b')
    300 			{
    301 				ln_state = LN_BO_XB;
    302 				break;
    303 			}
    304 			if (column + be_width + ul_width + 1 + ue_width >= sc_width)
    305 				/*
    306 				 * We have just barely enough room to
    307 				 * exit underline mode and handle a possible
    308 				 * underline/boldface run on mixup.
    309 				 */
    310 				return (1);
    311 			ln_state = LN_BO_X;
    312 			break;
    313 		case LN_UL_X:
    314 			if (c == '\b')
    315 				ln_state = LN_UL_XB;
    316 			else
    317 			{
    318 				/*
    319 				 * Exit underline mode.
    320 				 * We have to shuffle the chars a bit
    321 				 * to make this work.
    322 				 */
    323 				curr[0] = curr[-1];
    324 				curr[-1] = UE_CHAR;
    325 				column += ue_width;
    326 				if (ue_width > 0 && curr[0] == ' ')
    327 					/*
    328 					 * Another special case for magic
    329 					 * cookie terminals: if the next
    330 					 * char is a space, replace it
    331 					 * with the "exit underline" sequence.
    332 					 */
    333 					column--;
    334 				else
    335 					curr++;
    336 				ln_state = LN_NORMAL;
    337 			}
    338 			break;
    339 		case LN_BO_X:
    340 			if (c == '\b')
    341 				ln_state = LN_BO_XB;
    342 			else
    343 			{
    344 				/*
    345 				 * Exit boldface mode.
    346 				 * We have to shuffle the chars a bit
    347 				 * to make this work.
    348 				 */
    349 				curr[0] = curr[-1];
    350 				curr[-1] = BE_CHAR;
    351 				column += be_width;
    352 				if (be_width > 0 && curr[0] == ' ')
    353 					/*
    354 					 * Another special case for magic
    355 					 * cookie terminals: if the next
    356 					 * char is a space, replace it
    357 					 * with the "exit boldface" sequence.
    358 					 */
    359 					column--;
    360 				else
    361 					curr++;
    362 				ln_state = LN_NORMAL;
    363 			}
    364 			break;
    365 		}
    366 	}
    367 
    368 	if (c == '\t') {
    369 		/*
    370 		 * Expand a tab into spaces.
    371 		 */
    372 		do {
    373 			NEW_COLUMN(1);
    374 		} while ((column % tabstop) != 0);
    375 		*curr++ = '\t';
    376 		return (0);
    377 	}
    378 
    379 	if (c == '\b') {
    380 		if (ln_state == LN_NORMAL)
    381 			NEW_COLUMN(2);
    382 		else
    383 			column--;
    384 		*curr++ = ('H' | 0200);
    385 		return(0);
    386 	}
    387 
    388 	if (CONTROL_CHAR(c)) {
    389 		/*
    390 		 * Put a "^X" into the buffer.  The 0200 bit is used to tell
    391 		 * put_line() to prefix the char with a ^.  We don't actually
    392 		 * put the ^ in the buffer because we sometimes need to move
    393 		 * chars around, and such movement might separate the ^ from
    394 		 * its following character.
    395 		 */
    396 		NEW_COLUMN(2);
    397 		*curr++ = (CARAT_CHAR(c) | 0200);
    398 		return(0);
    399 	}
    400 
    401 	/*
    402 	 * Ordinary character.  Just put it in the buffer.
    403 	 */
    404 	NEW_COLUMN(1);
    405 	*curr++ = c;
    406 	return (0);
    407 }
    408 
    409 /*
    410  * Analogous to forw_line(), but deals with "raw lines":
    411  * lines which are not split for screen width.
    412  * {{ This is supposed to be more efficient than forw_line(). }}
    413  */
    414 off_t
    415 forw_raw_line(curr_pos)
    416 	off_t curr_pos;
    417 {
    418 	char *p;
    419 	int c;
    420 	off_t new_pos;
    421 
    422 	if (curr_pos == NULL_POSITION || ch_seek(curr_pos) ||
    423 		(c = ch_forw_get()) == EOI)
    424 		return (NULL_POSITION);
    425 
    426 	p = linebuf;
    427 
    428 	for (;;)
    429 	{
    430 		if (c == '\n' || c == EOI)
    431 		{
    432 			new_pos = ch_tell();
    433 			break;
    434 		}
    435 		if (p >= &linebuf[sizeof(linebuf)-1])
    436 		{
    437 			/*
    438 			 * Overflowed the input buffer.
    439 			 * Pretend the line ended here.
    440 			 * {{ The line buffer is supposed to be big
    441 			 *    enough that this never happens. }}
    442 			 */
    443 			new_pos = ch_tell() - 1;
    444 			break;
    445 		}
    446 		*p++ = c;
    447 		c = ch_forw_get();
    448 	}
    449 	*p = '\0';
    450 	line = linebuf;
    451 	return (new_pos);
    452 }
    453 
    454 /*
    455  * Analogous to back_line(), but deals with "raw lines".
    456  * {{ This is supposed to be more efficient than back_line(). }}
    457  */
    458 off_t
    459 back_raw_line(curr_pos)
    460 	off_t curr_pos;
    461 {
    462 	char *p;
    463 	int c;
    464 	off_t new_pos;
    465 
    466 	if (curr_pos == NULL_POSITION || curr_pos <= (off_t)0 ||
    467 		ch_seek(curr_pos-1))
    468 		return (NULL_POSITION);
    469 
    470 	p = &linebuf[sizeof(linebuf)];
    471 	*--p = '\0';
    472 
    473 	for (;;)
    474 	{
    475 		c = ch_back_get();
    476 		if (c == '\n')
    477 		{
    478 			/*
    479 			 * This is the newline ending the previous line.
    480 			 * We have hit the beginning of the line.
    481 			 */
    482 			new_pos = ch_tell() + 1;
    483 			break;
    484 		}
    485 		if (c == EOI)
    486 		{
    487 			/*
    488 			 * We have hit the beginning of the file.
    489 			 * This must be the first line in the file.
    490 			 * This must, of course, be the beginning of the line.
    491 			 */
    492 			new_pos = (off_t)0;
    493 			break;
    494 		}
    495 		if (p <= linebuf)
    496 		{
    497 			/*
    498 			 * Overflowed the input buffer.
    499 			 * Pretend the line ended here.
    500 			 */
    501 			new_pos = ch_tell() + 1;
    502 			break;
    503 		}
    504 		*--p = c;
    505 	}
    506 	line = p;
    507 	return (new_pos);
    508 }
    509