Home | History | Annotate | Line # | Download | only in util
      1 /*	$NetBSD: quote_for_json.c,v 1.3 2026/05/09 18:49:23 christos Exp $	*/
      2 
      3 /*++
      4 /* NAME
      5 /*	quote_for_json 3
      6 /* SUMMARY
      7 /*	quote UTF-8 string value for JSON
      8 /* SYNOPSIS
      9 /*	#include <quote_for_json.h>
     10 /*
     11 /*	char	*quote_for_json(
     12 /*	VSTRING	*result,
     13 /*	const char *in,
     14 /*	ssize_t	len)
     15 /*
     16 /*	char	*quote_for_json_append(
     17 /*	VSTRING	*result,
     18 /*	const char *in,
     19 /*	ssize_t	len)
     20 /*
     21 /*	char	*quote_for_json_var(
     22 /*	VSTRING	*result,
     23 /*	const char *in)
     24 /* DESCRIPTION
     25 /*	quote_for_json() takes well-formed UTF-8 encoded text,
     26 /*	quotes that text compliant with RFC 4627, and returns a
     27 /*	pointer to the resulting text. The input may contain null
     28 /*	bytes, but the output will not.
     29 /*
     30 /*	quote_for_json() produces short (two-letter) escape sequences
     31 /*	for common control characters, double quote and backslash.
     32 /*	It will not quote "/" (0x2F), and will quote DEL (0x7f) as
     33 /*	\u007F to make it printable. The input byte sequence "\uXXXX"
     34 /*	is quoted like any other text (the "\" is escaped as "\\").
     35 /*
     36 /*	quote_for_json() does not perform UTF-8 validation. The caller
     37 /*	should use valid_utf8_string() or printable() as appropriate.
     38 /*
     39 /*	quote_for_json_append() appends the output to the result buffer.
     40 /*
     41 /*	quote_for_json_var() takes a null-terminated sequence of
     42 /*	null-terminated arguments and formats them with quote_for_json().
     43 *
     44 /*	Arguments:
     45 /* .IP result
     46 /*	Storage for the result, resized automatically.
     47 /* .IP in
     48 /*	Pointer to the input byte sequence.
     49 /* .IP len
     50 /*	The length of the input byte sequence, or a negative number
     51 /*	when the byte sequence is null-terminated.
     52 /* DIAGNOSTICS
     53 /*	Fatal error: memory allocation error.
     54 /* LICENSE
     55 /* .ad
     56 /* .fi
     57 /*	The Secure Mailer license must be distributed with this software.
     58 /* AUTHOR(S)
     59 /*	Wietse Venema
     60 /*	Google, Inc.
     61 /*	111 8th Avenue
     62 /*	New York, NY 10011, USA
     63 /*
     64 /*	Wietse Venema
     65 /*	porcupine.org
     66 /*--*/
     67 
     68  /*
     69   * System library.
     70   */
     71 #include <sys_defs.h>
     72 #include <ctype.h>
     73 #include <stdarg.h>
     74 #include <string.h>
     75 
     76  /*
     77   * Utility library.
     78   */
     79 #include <stringops.h>
     80 #include <vstring.h>
     81 
     82 #define STR(x) vstring_str(x)
     83 
     84 /* quote_for_json_append - quote JSON string, append result */
     85 
     86 char   *quote_for_json_append(VSTRING *result, const char *text, ssize_t len)
     87 {
     88     const char *cp;
     89     int     ch;
     90 
     91     if (len < 0)
     92 	len = strlen(text);
     93 
     94     for (cp = text; len > 0; len--, cp++) {
     95 	ch = *(const unsigned char *) cp;
     96 	if (UNEXPECTED(ISCNTRL(ch))) {
     97 	    switch (ch) {
     98 	    case '\b':
     99 		VSTRING_ADDCH(result, '\\');
    100 		VSTRING_ADDCH(result, 'b');
    101 		break;
    102 	    case '\f':
    103 		VSTRING_ADDCH(result, '\\');
    104 		VSTRING_ADDCH(result, 'f');
    105 		break;
    106 	    case '\n':
    107 		VSTRING_ADDCH(result, '\\');
    108 		VSTRING_ADDCH(result, 'n');
    109 		break;
    110 	    case '\r':
    111 		VSTRING_ADDCH(result, '\\');
    112 		VSTRING_ADDCH(result, 'r');
    113 		break;
    114 	    case '\t':
    115 		VSTRING_ADDCH(result, '\\');
    116 		VSTRING_ADDCH(result, 't');
    117 		break;
    118 	    default:
    119 		/* All other controls including DEL and NUL. */
    120 		vstring_sprintf_append(result, "\\u%04X", ch);
    121 		break;
    122 	    }
    123 	} else {
    124 	    switch (ch) {
    125 	    case '\\':
    126 	    case '"':
    127 		VSTRING_ADDCH(result, '\\');
    128 		/* FALLTHROUGH */
    129 	    default:
    130 		/* Includes malformed UTF-8. */
    131 		VSTRING_ADDCH(result, ch);
    132 		break;
    133 	    }
    134 	}
    135     }
    136     VSTRING_TERMINATE(result);
    137     return (STR(result));
    138 }
    139 
    140 /* quote_for_json - quote JSON string */
    141 
    142 char   *quote_for_json(VSTRING *result, const char *text, ssize_t len)
    143 {
    144     VSTRING_RESET(result);
    145     return (quote_for_json_append(result, text, len));
    146 }
    147 
    148 
    149 /* quote_for_json_var - quote null-terminated list of null-terminated strings */
    150 
    151 char   *quote_for_json_var(VSTRING *result,...)
    152 {
    153     const char *in;
    154     va_list ap;
    155 
    156     VSTRING_RESET(result);
    157     va_start(ap, result);
    158     while ((in = va_arg(ap, const char *)) != 0)
    159 	quote_for_json_append(result, in, -1);
    160     return (STR(result));
    161 }
    162 
    163 #ifdef TEST
    164 
    165  /*
    166   * System library.
    167   */
    168 #include <stdlib.h>
    169 
    170  /*
    171   * Utility library.
    172   */
    173 #include <argv.h>
    174 #include <msg.h>
    175 #include <msg_vstream.h>
    176 
    177 typedef struct TEST_CASE {
    178     const char *label;			/* identifies test case */
    179     int     (*action) (const struct TEST_CASE *);
    180     union {
    181 	struct {
    182 	    char   *(*fn) (VSTRING *, const char *, ssize_t);
    183 	    const char *input;		/* input string */
    184 	    ssize_t input_len;		/* -1 or input length */
    185 	}       fixed;
    186 	struct {
    187 	    char   *(*fn) (VSTRING *,...);
    188 	    const char *input;
    189 	}       variadic;
    190     }       u;
    191     const char *exp_res;		/* expected result */
    192 } TEST_CASE;
    193 
    194 #define PASS	(0)
    195 #define FAIL	(1)
    196 
    197 static VSTRING *res_buf;
    198 
    199 static int run_fixed_test(const TEST_CASE *tp)
    200 {
    201     int     test_fail = 0;
    202     char   *res;
    203 
    204     res = tp->u.fixed.fn(res_buf, tp->u.fixed.input, tp->u.fixed.input_len);
    205     if (strcmp(res, tp->exp_res) != 0) {
    206 	msg_warn("test case '%s': got '%s', want '%s'",
    207 		 tp->label, res, tp->exp_res);
    208 	test_fail = 1;
    209     }
    210     return (test_fail);
    211 }
    212 
    213 static int run_variadic_test(const TEST_CASE *tp)
    214 {
    215     int     test_fail = 0;
    216     char   *res;
    217     ARGV   *argv = argv_split(tp->u.variadic.input, CHARS_SPACE);
    218 
    219     res = tp->u.variadic.fn(res_buf, argv->argv[0], argv->argv[1],
    220 			    argv->argv[2], argv->argv[3]);
    221     if (strcmp(res, tp->exp_res) != 0) {
    222 	msg_warn("test case '%s': got '%s', want '%s'",
    223 		 tp->label, res, tp->exp_res);
    224 	test_fail = 1;
    225     }
    226     argv_free(argv);
    227     return (test_fail);
    228 }
    229 
    230  /*
    231   * The test cases.
    232   */
    233 static const TEST_CASE test_cases[] = {
    234     {"ordinary ASCII text", run_fixed_test,
    235 	.u.fixed = {quote_for_json,
    236 	" abcABC012.,[]{}/", -1}, " abcABC012.,[]{}/",
    237     },
    238     {"quote_for_json_append", run_fixed_test,
    239 	.u.fixed = {quote_for_json_append,
    240 	"foo", -1}, " abcABC012.,[]{}/foo",
    241     },
    242     {"common control characters", run_fixed_test,
    243 	.u.fixed = {quote_for_json,
    244 	"\b\f\r\n\t", -1}, "\\b\\f\\r\\n\\t",
    245     },
    246     {"uncommon control characters and DEL", run_fixed_test,
    247 	.u.fixed = {quote_for_json,
    248 	"\0\01\037\040\176\177", 6}, "\\u0000\\u0001\\u001F ~\\u007F",
    249     },
    250     {"malformed UTF-8", run_fixed_test,
    251 	.u.fixed = {quote_for_json,
    252 	"\\*\\uasd\\u007F\x80", -1}, "\\\\*\\\\uasd\\\\u007F\x80",
    253     },
    254     {"multiple input strings", run_variadic_test,
    255 	.u.variadic = {quote_for_json_var, "one - two"},
    256 	"one-two",
    257     },
    258     0,
    259 };
    260 
    261 int     main(int argc, char **argv)
    262 {
    263     const TEST_CASE *tp;
    264     int     pass = 0;
    265     int     fail = 0;
    266 
    267     msg_vstream_init(sane_basename((VSTRING *) 0, argv[0]), VSTREAM_ERR);
    268     res_buf = vstring_alloc(100);
    269 
    270     for (tp = test_cases; tp->label != 0; tp++) {
    271 	int     test_fail = 0;
    272 
    273 	msg_info("RUN  %s", tp->label);
    274 	test_fail = tp->action(tp);
    275 	if (test_fail) {
    276 	    fail++;
    277 	    msg_info("FAIL %s", tp->label);
    278 	} else {
    279 	    msg_info("PASS %s", tp->label);
    280 	    pass++;
    281 	}
    282     }
    283     msg_info("PASS=%d FAIL=%d", pass, fail);
    284     exit(fail != 0);
    285 }
    286 
    287 #endif
    288