Home | History | Annotate | Line # | Download | only in efi
      1 /* $NetBSD: utils.c,v 1.4 2026/01/06 10:54:41 nia Exp $ */
      2 
      3 /*
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1. Redistributions of source code must retain the above copyright
      8  *    notice, this list of conditions and the following disclaimer.
      9  * 2. Redistributions in binary form must reproduce the above copyright
     10  *    notice, this list of conditions and the following disclaimer in the
     11  *    documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
     14  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     15  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     16  * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     19  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     23  * SUCH DAMAGE.
     24  */
     25 
     26 #include <sys/cdefs.h>
     27 #ifndef lint
     28 __RCSID("$NetBSD: utils.c,v 1.4 2026/01/06 10:54:41 nia Exp $");
     29 #endif /* not lint */
     30 
     31 #include <sys/endian.h>
     32 #include <sys/uuid.h>
     33 
     34 #include <assert.h>
     35 #include <ctype.h>
     36 #include <err.h>
     37 #include <errno.h>
     38 #include <inttypes.h>
     39 #include <limits.h>
     40 #include <stdio.h>
     41 #include <stdlib.h>
     42 #include <string.h>
     43 #include <fcntl.h>
     44 #include <unistd.h>
     45 #include <util.h>
     46 
     47 #include "defs.h"
     48 #include "utils.h"
     49 
     50 #define UCS2_REPLACEMENT_CHARACTER	0xfffd
     51 
     52 /************************************************************************
     53  * Character encoding conversion routnes
     54  *
     55  * UEFI uses UCS-2 character encoding.  Note that this is not UTF-16
     56  * as it doesn't interpret surrogate pairs.
     57  *
     58  * From <https://www.unicode.org/faq/utf_bom.html#utf16-11>:
     59  *
     60  * "UCS-2 is obsolete terminology which refers to a Unicode
     61  * implementation up to Unicode 1.1, before surrogate code points and
     62  * UTF-16 were added to Version 2.0 of the standard. This term should
     63  * now be avoided.
     64  *
     65  * UCS-2 does not describe a data format distinct from UTF-16, because
     66  * both use exactly the same 16-bit code unit
     67  * representations. However, UCS-2 does not interpret surrogate code
     68  * points, and thus cannot be used to conformantly represent
     69  * supplementary characters.
     70  *
     71  * Sometimes in the past an implementation has been labeled "UCS-2" to
     72  * indicate that it does not support supplementary characters and
     73  * doesn't interpret pairs of surrogate code points as
     74  * characters. Such an implementation would not handle processing of
     75  * character properties, code point boundaries, collation, etc. for
     76  * supplementary characters, nor would it be able to support most
     77  * emoji, for example. [AF]"
     78  *
     79  * Regarding illegal UTF-8 sequences, the same document says:
     80  *
     81  * "None of the UTFs can generate every arbitrary byte sequence. For
     82  * example, in UTF-8 every byte of the form 110xxxxx_2 must be
     83  * followed with a byte of the form 10xxxxxx_2. A sequence such as
     84  * <110xxxxx_2 0xxxxxxx_2> is illegal, and must never be
     85  * generated. When faced with this illegal byte sequence while
     86  * transforming or interpreting, a UTF-8 conformant process must treat
     87  * the first byte 110xxxxx_2 as an illegal termination error: for
     88  * example, either signaling an error, filtering the byte out, or
     89  * representing the byte with a marker such as U+FFFD REPLACEMENT
     90  * CHARACTER. In the latter two cases, it will continue processing at
     91  * the second byte 0xxxxxxx_2.
     92  *
     93  * A conformant process must not interpret illegal or ill-formed byte
     94  * sequences as characters, however, it may take error recovery
     95  * actions. No conformant process may use irregular byte sequences to
     96  * encode out-of-band information."
     97  */
     98 
     99 /*
    100  * ibuf  = input buffer (uint16_t *)
    101  * isz   = bytes in input buffer
    102  * obuf  = output buffer (char *)
    103  * osz   = bytes in output buffer (size_t *)
    104  *
    105  * if (obuf == NULL), malloc obuf.
    106  * if (obuf != NULL), write to existing output buffer.
    107  *
    108  * Return resulting utf8 string.
    109  */
    110 PUBLIC char *
    111 ucs2_to_utf8(const uint16_t *ibuf, size_t isz, char *obuf, size_t *osz)
    112 {
    113 	uint8_t *dst;
    114 	size_t dsz, i, j, j_max, n;
    115 	uint16_t c;
    116 
    117 	assert(isz > 0);
    118 
    119 	if (obuf != NULL) {
    120 		assert(osz != NULL);
    121 		dsz = *osz;
    122 		dst = (uint8_t *)obuf;
    123 	}
    124 	else {
    125 		dsz = isz * sizeof(*dst);
    126 		dst = malloc(dsz);
    127 	}
    128 
    129 	/*
    130 	 * Each UCS2 character will encode as at most 3 UTF8 bytes.
    131 	 * 'isz' is the number of bytes in the source buffer which
    132 	 * may well be larger than the UCS2 string.  'osz' is the
    133 	 * actual number of bytes in the NUL terminated USC2 string.
    134 	 */
    135 	n = isz / sizeof(*ibuf); /* max number of characters in input buffer */
    136 	j = 0;
    137 	j_max = dsz / sizeof(*dst);
    138 	for (i = 0; i < n; i++) {
    139 		c = le16toh(ibuf[i]);
    140 		if (c == 0) {
    141 			break;
    142 		}
    143 		if (c < 0x0080) {
    144 			if (j + 1 >= j_max)
    145 				break;
    146 			dst[j++] = (uint8_t)c;
    147 		}
    148 		else if (c < 0x0800) {
    149 			if (j + 2 >= j_max)
    150 				break;
    151 			dst[j++] = 0xc0 | (uint8_t)(c >> 6);
    152 			dst[j++] = 0x80 | (uint8_t)(c & 0x3f);
    153 		}
    154 		else {
    155 			if (j + 3 >= j_max)
    156 				break;
    157 			dst[j++] = 0xe0 | (uint8_t)(c >> 12);
    158 			dst[j++] = 0x80 | (uint8_t)((c >> 6) & 0x3f);
    159 			dst[j++] = 0x80 | (uint8_t)(c & 0x3f);
    160 		}
    161 	}
    162 	if (dst != NULL)
    163 		dst[j] = '\0';
    164 
    165 	if (osz)
    166 		*osz = j;
    167 
    168 	return (char *)dst;
    169 }
    170 
    171 /*
    172  * ibuf  = input buffer (char *)
    173  * isz   = bytes in input buffer
    174  * obuf  = output buffer (uint16_t *)
    175  * osz   = bytes in output buffer (size_t *)
    176  *
    177  * if (obuf == NULL), malloc obuf.
    178  * if (obuf != NULL), write to existing output buffer.
    179  *
    180  * Return resulting ucs2 string.
    181  */
    182 PUBLIC uint16_t *
    183 utf8_to_ucs2(const char *ibuf, size_t isz, uint16_t *obuf, size_t *osz)
    184 {
    185 	const uint8_t *src = (const uint8_t *)ibuf;
    186 	uint16_t *dst;
    187 	uint16_t out;
    188 	size_t dsz, i, j, j_max;
    189 
    190 	if (obuf != NULL) {
    191 		assert(osz != NULL);
    192 		dst = obuf;
    193 		dsz = *osz;
    194 	}
    195 	else {
    196 		dsz = isz * sizeof(*dst);
    197 		dst = malloc(dsz);
    198 	}
    199 
    200 	j = 0;
    201 	j_max = dsz / sizeof(*dst);
    202 	for (i = 0; i < isz; i++) {
    203 		out = src[i];
    204 		if (out == '\0') {
    205 			break;
    206 		}
    207 		else if (j + 1 >= j_max) {
    208 			break;
    209 		}
    210 		else if ((out & 0x80) == 0) {
    211 			/* we're good to go */
    212 		}
    213 		else if ((out & 0xe0) == 0xc0) {
    214 			if (i + 1 >= isz) { /* insufficient source */
    215 				break;
    216 			}
    217 			if ((src[i + 1] & 0xc0) != 0x80) {
    218 				out = UCS2_REPLACEMENT_CHARACTER;
    219 			}
    220 			else {
    221 				out &= 0x1f;
    222 				out <<= 6;
    223 				out |= src[++i] & 0x3f;
    224 			}
    225 		}
    226 		else if ((out & 0xf0) == 0xe0) {
    227 			if (i + 2 >= isz) { /* insufficient source */
    228 				break;
    229 			}
    230 			if ((src[i + 1] & 0xc0) != 0x80 ||
    231 			    (src[i + 2] & 0xc0) != 0x80) {
    232 				out = UCS2_REPLACEMENT_CHARACTER;
    233 			}
    234 			else {
    235 				out &= 0x0f;
    236 				out <<= 6;
    237 				out |= src[++i] & 0x3f;
    238 				out <<= 6;
    239 				out |= src[++i] & 0x3f;
    240 			}
    241 		}
    242 		else {	/* cannot encode as USC2 */
    243 			out = UCS2_REPLACEMENT_CHARACTER;
    244 		}
    245 		dst[j++] = htole16(out);
    246 	}
    247 	dst[j] = '\0';
    248 
    249 	if (src[i] != '\0')
    250 		warnx("bad UTF8 string");
    251 
    252 	if (osz)
    253 		*osz = (j + 1) * sizeof(*dst);
    254 	return dst;
    255 }
    256 
    257 PUBLIC size_t
    258 utf8_to_ucs2_size(const char *src)
    259 {
    260 #if 0
    261 	uint16_t *dst;
    262 	size_t sz;
    263 
    264 	dst = utf8_to_ucs2(src, strlen(src) + 1, NULL, &sz);
    265 	free(dst);
    266 	return sz;
    267 #else
    268 	const uint8_t *buf = (const uint8_t *)src;
    269 	uint out;
    270 	size_t i, j;
    271 
    272 	j = 0;
    273 	for (i = 0; (out = buf[i]) != '\0'; i++) {
    274 		if ((out & 0x80) == 0) {
    275 			/* we're good to go */
    276 		}
    277 		else if ((out & 0xe0) == 0xc0) {
    278 			if ((buf[i + 1] & 0xc0) == 0x80) {
    279 				i++;
    280 			}
    281 		}
    282 		else if ((out & 0xf0) == 0xe0) {
    283 			if ((buf[i + 1] & 0xc0) == 0x80 &&
    284 			    (buf[i + 2] & 0xc0) == 0x80) {
    285 				i += 2;
    286 			}
    287 		}
    288 		j++;
    289 	}
    290 
    291 	return (j + 1) * sizeof(uint16_t);
    292 #endif
    293 }
    294 
    295 /************************************************************************
    296  * UUID routines
    297  */
    298 PUBLIC int
    299 uuid_scanf(struct uuid *uuid, const char *str)
    300 {
    301 
    302 	return sscanf(str,
    303 	    "%08x-%04hx-%04hx-%02hhx%02hhx-%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx",
    304 	    &uuid->time_low, &uuid->time_mid, &uuid->time_hi_and_version,
    305 	    &uuid->clock_seq_hi_and_reserved, &uuid->clock_seq_low,
    306 	    &uuid->node[0], &uuid->node[1], &uuid->node[2], &uuid->node[3],
    307 	    &uuid->node[4], &uuid->node[5]);
    308 }
    309 
    310 /*
    311  * from sys/kern/kern_uuid.c
    312  */
    313 PUBLIC int
    314 uuid_snprintf(char *buf, size_t sz, const struct uuid *uuid)
    315 {
    316 
    317 	return snprintf(buf, sz,
    318 	    "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
    319 	    uuid->time_low, uuid->time_mid, uuid->time_hi_and_version,
    320 	    uuid->clock_seq_hi_and_reserved, uuid->clock_seq_low,
    321 	    uuid->node[0], uuid->node[1], uuid->node[2], uuid->node[3],
    322 	    uuid->node[4], uuid->node[5]);
    323 }
    324 
    325 PUBLIC int
    326 uuid_printf(const struct uuid *uuid)
    327 {
    328 	char buf[UUID_STR_LEN];
    329 
    330 	(void) uuid_snprintf(buf, sizeof(buf), uuid);
    331 	printf("%s", buf);
    332 	return (0);
    333 }
    334 
    335 /************************************************************************
    336  * Misc routines
    337  */
    338 
    339 PUBLIC void
    340 show_data(const uint8_t *buf, size_t len, const char *prefix)
    341 {
    342 	uint8_t line_buf[17];
    343 	size_t i;
    344 
    345 	line_buf[16] = '\0';
    346 	for (i = 0; i < len; i++) {
    347 		if ((i & 0xf) == 0)
    348 			printf("%s%08zx: ", prefix, i);
    349 		line_buf[i & 0xf] = isprint((int)buf[i]) ? buf[i] : '.';
    350 		printf("%02x ", buf[i]);
    351 		if ((i & 0xf) == 0xf)
    352 			printf("  %s\n", line_buf);
    353 		else if ((i & 0x7) == 0x7)
    354 			printf(" ");
    355 	}
    356 	i &= 0xf;
    357 	if (i != 0) {
    358 		line_buf[i] = '\0';
    359 		if (i < 8)
    360 			printf(" ");
    361 		while (i++ < 16)
    362 			printf("   ");
    363 
    364 		printf("  %s\n", line_buf);
    365 	}
    366 }
    367 
    368 PUBLIC uint16_t
    369 strtous(const char *str, char **endptr, int base)
    370 {
    371 	uintmax_t val;
    372 	int rstatus;
    373 
    374 	val = strtou(str, endptr, base, 0, USHRT_MAX, &rstatus);
    375 
    376 	switch (rstatus) {
    377 	case EINVAL:
    378 		assert(0);
    379 		break;
    380 	case ENOTSUP:
    381 		if (endptr != NULL)
    382 			break;
    383 		/*FALLTHROUGH*/
    384 	case ECANCELED:
    385 		err(EXIT_FAILURE, "invalid numeric string: %s\n", str);
    386 	case ERANGE:
    387 		err(EXIT_FAILURE, "value out of range [0,%#x]: %s\n",
    388 		    USHRT_MAX, str);
    389 	default:
    390 		break;
    391 	}
    392 
    393 	return (uint16_t)val;
    394 }
    395 
    396 char *
    397 read_file(const char *fname, size_t *size)
    398 {
    399 	char *buf, *cp, *ep;
    400 	size_t bufsz, cnt, sz;
    401 	ssize_t ssz;
    402 	int fd, fd_flags;
    403 
    404 	assert(fname != NULL);
    405 	if (fname == NULL)
    406 		return 0;
    407 
    408 	if (strcmp(fname, "-") == 0) {
    409 		fd = STDIN_FILENO;
    410 		if ((fd_flags = fcntl(fd, F_GETFL)) == -1)
    411 			err(EXIT_FAILURE, "fcntl F_GETFL");
    412 
    413 		if (fcntl(fd, F_SETFL, O_NONBLOCK | fd_flags) == -1)
    414 			err(EXIT_FAILURE, "fcntl F_SETFL");
    415 	}
    416 	else {
    417 		fd_flags = -1;
    418 		fd = open(fname, O_RDONLY);
    419 		if (fd == -1)
    420 			err(EXIT_FAILURE, "open");
    421 	}
    422 
    423 	bufsz = 0x800;
    424 	buf = emalloc(bufsz);
    425 	cp = buf;
    426 	ep = buf + bufsz;
    427 	cnt = 0;
    428 	for (;;) {
    429 		ssz = read(fd, cp, (size_t)(ep - cp));
    430 		if (ssz == -1) {
    431 			if (errno == EAGAIN)
    432 				continue;
    433 			err(EXIT_FAILURE, "read");
    434 		}
    435 		assert(ssz >= 0);
    436 #if 0
    437 		printf("ssz: %zd\n", ssz);
    438 		show_data((uint8_t *)cp, (size_t)ssz, "");
    439 #endif
    440 		if (ssz == 0)
    441 			break;
    442 
    443 		cp += ssz;
    444 		sz = (size_t)ssz;
    445 		cnt += sz;
    446 
    447 		if (cp < ep) {
    448 			/* XXX: what about UCS-2? */
    449 			*cp = '\0';
    450 			cnt++;
    451 			break;
    452 		}
    453 
    454 		if (cp == ep) {
    455 			bufsz *= 2;
    456 			buf = erealloc(buf, bufsz);
    457 			cp = buf + cnt;
    458 			ep = buf + bufsz;
    459 		}
    460 	}
    461 	if (fd_flags != -1)
    462 		fcntl(fd, F_SETFL, fd_flags);
    463 	else
    464 		close(fd);
    465 
    466 	*size = cnt;
    467 	return buf;
    468 }
    469