Home | History | Annotate | Line # | Download | only in indent
pr_comment.c revision 1.163
      1 /*	$NetBSD: pr_comment.c,v 1.163 2023/06/14 08:36:51 rillig Exp $	*/
      2 
      3 /*-
      4  * SPDX-License-Identifier: BSD-4-Clause
      5  *
      6  * Copyright (c) 1985 Sun Microsystems, Inc.
      7  * Copyright (c) 1980, 1993
      8  *	The Regents of the University of California.  All rights reserved.
      9  * All rights reserved.
     10  *
     11  * Redistribution and use in source and binary forms, with or without
     12  * modification, are permitted provided that the following conditions
     13  * are met:
     14  * 1. Redistributions of source code must retain the above copyright
     15  *    notice, this list of conditions and the following disclaimer.
     16  * 2. Redistributions in binary form must reproduce the above copyright
     17  *    notice, this list of conditions and the following disclaimer in the
     18  *    documentation and/or other materials provided with the distribution.
     19  * 3. All advertising materials mentioning features or use of this software
     20  *    must display the following acknowledgement:
     21  *	This product includes software developed by the University of
     22  *	California, Berkeley and its contributors.
     23  * 4. Neither the name of the University nor the names of its contributors
     24  *    may be used to endorse or promote products derived from this software
     25  *    without specific prior written permission.
     26  *
     27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     37  * SUCH DAMAGE.
     38  */
     39 
     40 #include <sys/cdefs.h>
     41 __RCSID("$NetBSD: pr_comment.c,v 1.163 2023/06/14 08:36:51 rillig Exp $");
     42 
     43 #include <string.h>
     44 
     45 #include "indent.h"
     46 
     47 static void
     48 com_add_char(char ch)
     49 {
     50 	buf_add_char(&com, ch);
     51 }
     52 
     53 static void
     54 com_add_delim(void)
     55 {
     56 	if (opt.star_comment_cont)
     57 		buf_add_chars(&com, " * ", 3);
     58 }
     59 
     60 static bool
     61 fits_in_one_line(int com_ind, int max_line_length)
     62 {
     63 	for (const char *start = inp_p, *p = start; *p != '\n'; p++) {
     64 		if (p[0] == '*' && p[1] == '/') {
     65 			while (p - inp_p >= 2
     66 			    && ch_isblank(p[-1])
     67 			    && ch_isblank(p[-2]))
     68 				p--;
     69 			int len = ind_add(com_ind + 3,
     70 			    start, (size_t)(p - start));
     71 			len += p == start || ch_isblank(p[-1]) ? 2 : 3;
     72 			return len <= max_line_length;
     73 		}
     74 	}
     75 	return false;
     76 }
     77 
     78 static void
     79 analyze_comment(bool *p_may_wrap, bool *p_delim,
     80     int *p_ind, int *p_line_length)
     81 {
     82 	bool may_wrap = true;
     83 	bool delim = false;
     84 	int ind;
     85 	int line_length = opt.max_line_length;
     86 
     87 	if (inp_p - inp.s == 2 && !opt.format_col1_comments) {
     88 		may_wrap = false;
     89 		ind = 0;
     90 	} else {
     91 		if (inp_p[0] == '-' || inp_p[0] == '*' ||
     92 		    token.s[token.len - 1] == '/' ||
     93 		    (inp_p[0] == '\n' && !opt.format_block_comments))
     94 			may_wrap = false;
     95 		if (code.len == 0 && inp_p[strspn(inp_p, "*")] == '\n')
     96 			out.line_kind = lk_block_comment;
     97 
     98 		if (com.len > 0)
     99 			output_line();
    100 		if (lab.len == 0 && code.len == 0) {
    101 			ind = (ps.ind_level - opt.unindent_displace)
    102 			    * opt.indent_size;
    103 			if (ind <= 0)
    104 				ind = opt.format_col1_comments ? 0 : 1;
    105 			line_length = opt.block_comment_max_line_length;
    106 			if (may_wrap && inp_p[0] == '\n')
    107 				delim = true;
    108 			if (may_wrap && opt.comment_delimiter_on_blank_line)
    109 				delim = true;
    110 		} else {
    111 			int target_ind = code.len > 0
    112 			    ? ind_add(compute_code_indent(), code.s, code.len)
    113 			    : ind_add(compute_label_indent(), lab.s, lab.len);
    114 
    115 			ind = ps.line_has_decl || ps.ind_level == 0
    116 			    ? opt.decl_comment_column - 1
    117 			    : opt.comment_column - 1;
    118 			if (ind <= target_ind)
    119 				ind = next_tab(target_ind);
    120 			if (ind + 25 > line_length)
    121 				line_length = ind + 25;
    122 		}
    123 	}
    124 
    125 	ps.com_ind = ind;
    126 
    127 	if (!may_wrap) {
    128 		/* Find out how much indentation there was originally, because
    129 		 * that much will have to be ignored by output_line. */
    130 		size_t len = (size_t)(inp_p - 2 - inp.s);
    131 		ps.n_comment_delta = -ind_add(0, inp.s, len);
    132 	} else {
    133 		ps.n_comment_delta = 0;
    134 		if (!(inp_p[0] == '\t' && !ch_isblank(inp_p[1])))
    135 			while (ch_isblank(inp_p[0]))
    136 				inp_p++;
    137 	}
    138 
    139 	*p_may_wrap = may_wrap;
    140 	*p_delim = delim;
    141 	*p_ind = ind;
    142 	*p_line_length = line_length;
    143 }
    144 
    145 static void
    146 copy_comment_start(bool may_wrap, bool *delim, int ind, int line_length)
    147 {
    148 	ps.comment_delta = 0;
    149 	com_add_char('/');
    150 	com_add_char(token.s[token.len - 1]);	/* either '*' or '/' */
    151 
    152 	if (may_wrap) {
    153 		if (!ch_isblank(inp_p[0]))
    154 			com_add_char(' ');
    155 
    156 		if (*delim && fits_in_one_line(ind, line_length))
    157 			*delim = false;
    158 		if (*delim) {
    159 			output_line();
    160 			com_add_delim();
    161 		}
    162 	}
    163 }
    164 
    165 static void
    166 copy_comment_wrap_text(int line_length, ssize_t *last_blank)
    167 {
    168 	int now_len = ind_add(ps.com_ind, com.s, com.len);
    169 	for (;;) {
    170 		char ch = inp_next();
    171 		if (ch_isblank(ch))
    172 			*last_blank = (ssize_t)com.len;
    173 		com_add_char(ch);
    174 		now_len++;
    175 		if (memchr("*\n\r\b\t", inp_p[0], 6) != NULL)
    176 			break;
    177 		if (now_len >= line_length && *last_blank != -1)
    178 			break;
    179 	}
    180 
    181 	if (now_len <= line_length)
    182 		return;
    183 	if (ch_isspace(com.s[com.len - 1]))
    184 		return;
    185 
    186 	if (*last_blank == -1) {
    187 		/* only a single word in this line */
    188 		output_line();
    189 		com_add_delim();
    190 		return;
    191 	}
    192 
    193 	const char *last_word_s = com.s + *last_blank + 1;
    194 	size_t last_word_len = com.len - (size_t)(*last_blank + 1);
    195 	com.len = (size_t)*last_blank;
    196 	output_line();
    197 	com_add_delim();
    198 
    199 	/* Assume that output_line and com_add_delim don't invalidate the
    200 	 * "unused" part of the buffer beyond com.s + com.len. */
    201 	memmove(com.s + com.len, last_word_s, last_word_len);
    202 	com.len += last_word_len;
    203 	*last_blank = -1;
    204 }
    205 
    206 static bool
    207 copy_comment_wrap_newline(ssize_t *last_blank, bool *seen_newline)
    208 {
    209 	*last_blank = -1;
    210 	if (*seen_newline) {
    211 		if (com.len == 0)
    212 			com_add_char(' ');	/* force empty output line */
    213 		if (com.len > 3) {
    214 			output_line();
    215 			com_add_delim();
    216 		}
    217 		output_line();
    218 		com_add_delim();
    219 	} else {
    220 		*seen_newline = true;
    221 		if (!(com.len > 0 && ch_isblank(com.s[com.len - 1])))
    222 			com_add_char(' ');
    223 		*last_blank = (int)com.len - 1;
    224 	}
    225 	++line_no;
    226 
    227 	/* flush any blanks and/or tabs at start of next line */
    228 	inp_skip();		/* '\n' */
    229 	while (ch_isblank(inp_p[0]))
    230 		inp_p++;
    231 	if (inp_p[0] == '*' && inp_p[1] == '/')
    232 		return false;
    233 	if (inp_p[0] == '*') {
    234 		inp_p++;
    235 		while (ch_isblank(inp_p[0]))
    236 			inp_p++;
    237 	}
    238 
    239 	return true;
    240 }
    241 
    242 static void
    243 copy_comment_wrap_finish(int line_length, bool delim)
    244 {
    245 	if (delim) {
    246 		if (com.len > 3)
    247 			output_line();
    248 		else
    249 			buf_clear(&com);
    250 		com_add_char(' ');
    251 	} else {
    252 		size_t len = com.len;
    253 		while (ch_isblank(com.s[len - 1]))
    254 			len--;
    255 		int end_ind = ind_add(ps.com_ind, com.s, len);
    256 		if (end_ind + 3 > line_length)
    257 			output_line();
    258 	}
    259 
    260 	while (com.len >= 2
    261 	    && ch_isblank(com.s[com.len - 1])
    262 	    && ch_isblank(com.s[com.len - 2]))
    263 		com.len--;
    264 	buf_terminate(&com);
    265 
    266 	inp_p += 2;
    267 	if (com.len > 0 && ch_isblank(com.s[com.len - 1]))
    268 		buf_add_chars(&com, "*/", 2);
    269 	else
    270 		buf_add_chars(&com, " */", 3);
    271 }
    272 
    273 /*
    274  * Copy characters from 'inp' to 'com'. Try to keep comments from going over
    275  * the maximum line length. To do that, remember where the last blank, tab, or
    276  * newline was. When a line is filled, print up to the last blank and continue
    277  * copying.
    278  */
    279 static void
    280 copy_comment_wrap(int line_length, bool delim)
    281 {
    282 	ssize_t last_blank = -1;	/* index of the last blank in 'com' */
    283 	bool seen_newline = false;
    284 
    285 	for (;;) {
    286 		if (inp_p[0] == '\n') {
    287 			if (had_eof)
    288 				goto unterminated_comment;
    289 			if (!copy_comment_wrap_newline(&last_blank,
    290 			    &seen_newline))
    291 				goto end_of_comment;
    292 		} else if (inp_p[0] == '*' && inp_p[1] == '/')
    293 			goto end_of_comment;
    294 		else {
    295 			copy_comment_wrap_text(line_length, &last_blank);
    296 			seen_newline = false;
    297 		}
    298 	}
    299 
    300 end_of_comment:
    301 	copy_comment_wrap_finish(line_length, delim);
    302 	return;
    303 
    304 unterminated_comment:
    305 	diag(1, "Unterminated comment");
    306 	output_line();
    307 }
    308 
    309 static void
    310 copy_comment_nowrap(void)
    311 {
    312 	char kind = token.s[token.len - 1];
    313 
    314 	for (;;) {
    315 		if (inp_p[0] == '\n') {
    316 			if (kind == '/')
    317 				return;
    318 
    319 			if (had_eof) {
    320 				diag(1, "Unterminated comment");
    321 				output_line();
    322 				return;
    323 			}
    324 
    325 			if (com.len == 0)
    326 				com_add_char(' ');	/* force output of an
    327 							 * empty line */
    328 			output_line();
    329 			++line_no;
    330 			inp_skip();
    331 			continue;
    332 		}
    333 
    334 		com_add_char(*inp_p++);
    335 		if (com.len >= 2
    336 		    && com.s[com.len - 2] == '*'
    337 		    && com.s[com.len - 1] == '/'
    338 		    && kind == '*')
    339 			return;
    340 	}
    341 }
    342 
    343 /*
    344  * Scan, reformat and output a single comment, which is either a block comment
    345  * starting with '/' '*' or an end-of-line comment starting with '//'.
    346  */
    347 void
    348 process_comment(void)
    349 {
    350 	bool may_wrap, delim;
    351 	int ind, line_length;
    352 
    353 	analyze_comment(&may_wrap, &delim, &ind, &line_length);
    354 	copy_comment_start(may_wrap, &delim, ind, line_length);
    355 	if (may_wrap)
    356 		copy_comment_wrap(line_length, delim);
    357 	else
    358 		copy_comment_nowrap();
    359 }
    360