1 /* $NetBSD: citrus_zw.c,v 1.6 2022/04/19 20:32:14 rillig Exp $ */ 2 3 /*- 4 * Copyright (c)2004, 2006 Citrus Project, 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 */ 29 30 #include <sys/cdefs.h> 31 #if defined(LIB_SCCS) && !defined(lint) 32 __RCSID("$NetBSD: citrus_zw.c,v 1.6 2022/04/19 20:32:14 rillig Exp $"); 33 #endif /* LIB_SCCS and not lint */ 34 35 #include <sys/types.h> 36 #include <assert.h> 37 #include <errno.h> 38 #include <string.h> 39 #include <stdio.h> 40 #include <stdint.h> 41 #include <stdlib.h> 42 #include <stddef.h> 43 #include <wchar.h> 44 #include <limits.h> 45 46 #include "citrus_namespace.h" 47 #include "citrus_types.h" 48 #include "citrus_module.h" 49 #include "citrus_ctype.h" 50 #include "citrus_stdenc.h" 51 #include "citrus_zw.h" 52 53 /* ---------------------------------------------------------------------- 54 * private stuffs used by templates 55 */ 56 57 typedef struct { 58 int dummy; 59 } _ZWEncodingInfo; 60 61 typedef enum { 62 NONE, AMBIGIOUS, ASCII, GB2312 63 } _ZWCharset; 64 65 typedef struct { 66 int chlen; 67 char ch[4]; 68 _ZWCharset charset; 69 } _ZWState; 70 71 typedef struct { 72 _ZWEncodingInfo ei; 73 struct { 74 /* for future multi-locale facility */ 75 _ZWState s_mblen; 76 _ZWState s_mbrlen; 77 _ZWState s_mbrtowc; 78 _ZWState s_mbtowc; 79 _ZWState s_mbsrtowcs; 80 _ZWState s_mbsnrtowcs; 81 _ZWState s_wcrtomb; 82 _ZWState s_wcsrtombs; 83 _ZWState s_wcsnrtombs; 84 _ZWState s_wctomb; 85 } states; 86 } _ZWCTypeInfo; 87 88 #define _CEI_TO_EI(_cei_) (&(_cei_)->ei) 89 #define _CEI_TO_STATE(_cei_, _func_) (_cei_)->states.s_##_func_ 90 91 #define _FUNCNAME(m) _citrus_ZW_##m 92 #define _ENCODING_INFO _ZWEncodingInfo 93 #define _CTYPE_INFO _ZWCTypeInfo 94 #define _ENCODING_STATE _ZWState 95 #define _ENCODING_MB_CUR_MAX(_ei_) MB_LEN_MAX 96 #define _ENCODING_IS_STATE_DEPENDENT 1 97 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_) ((_ps_)->charset != NONE) 98 99 static __inline void 100 /*ARGSUSED*/ 101 _citrus_ZW_init_state(_ZWEncodingInfo * __restrict ei, 102 _ZWState * __restrict psenc) 103 { 104 /* ei my be unused */ 105 _DIAGASSERT(psenc != NULL); 106 107 psenc->chlen = 0; 108 psenc->charset = NONE; 109 } 110 111 static __inline void 112 /*ARGSUSED*/ 113 _citrus_ZW_pack_state(_ZWEncodingInfo * __restrict ei, 114 void *__restrict pspriv, const _ZWState * __restrict psenc) 115 { 116 /* ei may be unused */ 117 _DIAGASSERT(pspriv != NULL); 118 _DIAGASSERT(psenc != NULL); 119 120 memcpy(pspriv, (const void *)psenc, sizeof(*psenc)); 121 } 122 123 static __inline void 124 /*ARGSUSED*/ 125 _citrus_ZW_unpack_state(_ZWEncodingInfo * __restrict ei, 126 _ZWState * __restrict psenc, const void * __restrict pspriv) 127 { 128 /* ei may be unused */ 129 _DIAGASSERT(psenc != NULL); 130 _DIAGASSERT(pspriv != NULL); 131 132 memcpy((void *)psenc, pspriv, sizeof(*psenc)); 133 } 134 135 static int 136 _citrus_ZW_mbrtowc_priv(_ZWEncodingInfo * __restrict ei, 137 wchar_t * __restrict pwc, const char **__restrict s, size_t n, 138 _ZWState * __restrict psenc, size_t * __restrict nresult) 139 { 140 const char *s0; 141 int ch, len; 142 wchar_t wc; 143 144 /* ei may be unused */ 145 /* pwc may be null */ 146 _DIAGASSERT(s != NULL); 147 _DIAGASSERT(psenc != NULL); 148 _DIAGASSERT(nresult != NULL); 149 150 if (*s == NULL) { 151 _citrus_ZW_init_state(ei, psenc); 152 *nresult = (size_t)_ENCODING_IS_STATE_DEPENDENT; 153 return 0; 154 } 155 s0 = *s; 156 len = 0; 157 158 #define STORE \ 159 do { \ 160 if (n-- < 1) { \ 161 *nresult = (size_t)-2; \ 162 *s = s0; \ 163 return 0; \ 164 } \ 165 ch = (unsigned char)*s0++; \ 166 if (len++ > MB_LEN_MAX || ch > 0x7F)\ 167 goto ilseq; \ 168 psenc->ch[psenc->chlen++] = ch; \ 169 } while (0) 170 171 loop: 172 switch (psenc->charset) { 173 case ASCII: 174 switch (psenc->chlen) { 175 case 0: 176 STORE; 177 switch (psenc->ch[0]) { 178 case '\0': case '\n': 179 psenc->charset = NONE; 180 } 181 /*FALLTHROUGH*/ 182 case 1: 183 break; 184 default: 185 return EINVAL; 186 } 187 ch = (unsigned char)psenc->ch[0]; 188 if (ch > 0x7F) 189 goto ilseq; 190 wc = (wchar_t)ch; 191 psenc->chlen = 0; 192 break; 193 case NONE: 194 if (psenc->chlen != 0) 195 return EINVAL; 196 STORE; 197 ch = (unsigned char)psenc->ch[0]; 198 if (ch != 'z') { 199 if (ch != '\n' && ch != '\0') 200 psenc->charset = ASCII; 201 wc = (wchar_t)ch; 202 psenc->chlen = 0; 203 break; 204 } 205 psenc->charset = AMBIGIOUS; 206 psenc->chlen = 0; 207 /* FALLTHROUGH */ 208 case AMBIGIOUS: 209 if (psenc->chlen != 0) 210 return EINVAL; 211 STORE; 212 if (psenc->ch[0] != 'W') { 213 psenc->charset = ASCII; 214 wc = L'z'; 215 break; 216 } 217 psenc->charset = GB2312; 218 psenc->chlen = 0; 219 /* FALLTHROUGH */ 220 case GB2312: 221 switch (psenc->chlen) { 222 case 0: 223 STORE; 224 ch = (unsigned char)psenc->ch[0]; 225 if (ch == '\0') { 226 psenc->charset = NONE; 227 wc = (wchar_t)ch; 228 psenc->chlen = 0; 229 break; 230 } else if (ch == '\n') { 231 psenc->charset = NONE; 232 psenc->chlen = 0; 233 goto loop; 234 } 235 /*FALLTHROUGH*/ 236 case 1: 237 STORE; 238 if (psenc->ch[0] == ' ') { 239 ch = (unsigned char)psenc->ch[1]; 240 wc = (wchar_t)ch; 241 psenc->chlen = 0; 242 break; 243 } else if (psenc->ch[0] == '#') { 244 ch = (unsigned char)psenc->ch[1]; 245 if (ch == '\n') { 246 psenc->charset = NONE; 247 wc = (wchar_t)ch; 248 psenc->chlen = 0; 249 break; 250 } else if (ch == ' ') { 251 wc = (wchar_t)ch; 252 psenc->chlen = 0; 253 break; 254 } 255 } 256 ch = (unsigned char)psenc->ch[0]; 257 if (ch < 0x21 || ch > 0x7E) 258 goto ilseq; 259 wc = (wchar_t)(ch << 8); 260 ch = (unsigned char)psenc->ch[1]; 261 if (ch < 0x21 || ch > 0x7E) { 262 ilseq: 263 *nresult = (size_t)-1; 264 return EILSEQ; 265 } 266 wc |= (wchar_t)ch; 267 psenc->chlen = 0; 268 break; 269 default: 270 return EINVAL; 271 } 272 break; 273 default: 274 return EINVAL; 275 } 276 if (pwc != NULL) 277 *pwc = wc; 278 279 *nresult = (size_t)(wc == 0 ? 0 : len); 280 *s = s0; 281 282 return 0; 283 } 284 285 static int 286 /*ARGSUSED*/ 287 _citrus_ZW_wcrtomb_priv(_ZWEncodingInfo * __restrict ei, 288 char *__restrict s, size_t n, wchar_t wc, 289 _ZWState * __restrict psenc, size_t * __restrict nresult) 290 { 291 int ch; 292 293 /* ei may be null */ 294 _DIAGASSERT(s != NULL); 295 _DIAGASSERT(psenc != NULL); 296 _DIAGASSERT(nresult != NULL); 297 298 if (psenc->chlen != 0) 299 return EINVAL; 300 if ((uint32_t)wc <= 0x7F) { 301 ch = (unsigned char)wc; 302 switch (psenc->charset) { 303 case NONE: 304 if (ch == '\0' || ch == '\n') { 305 psenc->ch[psenc->chlen++] = ch; 306 } else { 307 if (n < 4) 308 return E2BIG; 309 n -= 4; 310 psenc->ch[psenc->chlen++] = 'z'; 311 psenc->ch[psenc->chlen++] = 'W'; 312 psenc->ch[psenc->chlen++] = ' '; 313 psenc->ch[psenc->chlen++] = ch; 314 psenc->charset = GB2312; 315 } 316 break; 317 case GB2312: 318 if (n < 2) 319 return E2BIG; 320 n -= 2; 321 if (ch == '\0') { 322 psenc->ch[psenc->chlen++] = '\n'; 323 psenc->ch[psenc->chlen++] = '\0'; 324 psenc->charset = NONE; 325 } else if (ch == '\n') { 326 psenc->ch[psenc->chlen++] = '#'; 327 psenc->ch[psenc->chlen++] = '\n'; 328 psenc->charset = NONE; 329 } else { 330 psenc->ch[psenc->chlen++] = ' '; 331 psenc->ch[psenc->chlen++] = ch; 332 } 333 break; 334 default: 335 return EINVAL; 336 } 337 } else if ((uint32_t)wc <= 0x7E7E) { 338 switch (psenc->charset) { 339 case NONE: 340 if (n < 2) 341 return E2BIG; 342 n -= 2; 343 psenc->ch[psenc->chlen++] = 'z'; 344 psenc->ch[psenc->chlen++] = 'W'; 345 psenc->charset = GB2312; 346 /* FALLTHROUGH*/ 347 case GB2312: 348 if (n < 2) 349 return E2BIG; 350 n -= 2; 351 ch = (wc >> 8) & 0xFF; 352 if (ch < 0x21 || ch > 0x7E) 353 goto ilseq; 354 psenc->ch[psenc->chlen++] = ch; 355 ch = wc & 0xFF; 356 if (ch < 0x21 || ch > 0x7E) 357 goto ilseq; 358 psenc->ch[psenc->chlen++] = ch; 359 break; 360 default: 361 return EINVAL; 362 } 363 } else { 364 ilseq: 365 *nresult = (size_t)-1; 366 return EILSEQ; 367 } 368 memcpy(s, psenc->ch, psenc->chlen); 369 *nresult = psenc->chlen; 370 psenc->chlen = 0; 371 372 return 0; 373 } 374 375 static int 376 /*ARGSUSED*/ 377 _citrus_ZW_put_state_reset(_ZWEncodingInfo * __restrict ei, 378 char * __restrict s, size_t n, 379 _ZWState * __restrict psenc, size_t * __restrict nresult) 380 { 381 /* ei may be unused */ 382 _DIAGASSERT(s != NULL); 383 _DIAGASSERT(psenc != NULL); 384 _DIAGASSERT(nresult != NULL); 385 386 if (psenc->chlen != 0) 387 return EINVAL; 388 switch (psenc->charset) { 389 case GB2312: 390 if (n-- < 1) 391 return E2BIG; 392 psenc->ch[psenc->chlen++] = '\n'; 393 psenc->charset = NONE; 394 /*FALLTHROUGH*/ 395 case NONE: 396 *nresult = psenc->chlen; 397 if (psenc->chlen > 0) { 398 memcpy(s, psenc->ch, psenc->chlen); 399 psenc->chlen = 0; 400 } 401 break; 402 default: 403 return EINVAL; 404 } 405 406 return 0; 407 } 408 409 static __inline int 410 /*ARGSUSED*/ 411 _citrus_ZW_stdenc_get_state_desc_generic(_ZWEncodingInfo * __restrict ei, 412 _ZWState * __restrict psenc, int * __restrict rstate) 413 { 414 /* ei may be unused */ 415 _DIAGASSERT(psenc != NULL); 416 _DIAGASSERT(rstate != NULL); 417 418 switch (psenc->charset) { 419 case NONE: 420 if (psenc->chlen != 0) 421 return EINVAL; 422 *rstate = _STDENC_SDGEN_INITIAL; 423 break; 424 case AMBIGIOUS: 425 if (psenc->chlen != 0) 426 return EINVAL; 427 *rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT; 428 break; 429 case ASCII: 430 case GB2312: 431 switch (psenc->chlen) { 432 case 0: 433 *rstate = _STDENC_SDGEN_STABLE; 434 break; 435 case 1: 436 *rstate = (psenc->ch[0] == '#') 437 ? _STDENC_SDGEN_INCOMPLETE_SHIFT 438 : _STDENC_SDGEN_INCOMPLETE_CHAR; 439 break; 440 default: 441 return EINVAL; 442 } 443 break; 444 default: 445 return EINVAL; 446 } 447 return 0; 448 } 449 450 static __inline int 451 /*ARGSUSED*/ 452 _citrus_ZW_stdenc_wctocs(_ZWEncodingInfo * __restrict ei, 453 _csid_t * __restrict csid, _index_t * __restrict idx, wchar_t wc) 454 { 455 /* ei seems to be unused */ 456 _DIAGASSERT(csid != NULL); 457 _DIAGASSERT(idx != NULL); 458 459 *csid = (_csid_t)(wc <= 0x7FU) ? 0 : 1; 460 *idx = (_index_t)wc; 461 462 return 0; 463 } 464 465 static __inline int 466 /*ARGSUSED*/ 467 _citrus_ZW_stdenc_cstowc(_ZWEncodingInfo * __restrict ei, 468 wchar_t * __restrict wc, _csid_t csid, _index_t idx) 469 { 470 /* ei seems to be unused */ 471 _DIAGASSERT(wc != NULL); 472 473 switch (csid) { 474 case 0: case 1: 475 break; 476 default: 477 return EINVAL; 478 } 479 *wc = (wchar_t)idx; 480 481 return 0; 482 } 483 484 static void 485 /*ARGSUSED*/ 486 _citrus_ZW_encoding_module_uninit(_ZWEncodingInfo *ei) 487 { 488 } 489 490 static int 491 /*ARGSUSED*/ 492 _citrus_ZW_encoding_module_init(_ZWEncodingInfo * __restrict ei, 493 const void *__restrict var, size_t lenvar) 494 { 495 return 0; 496 } 497 498 /* ---------------------------------------------------------------------- 499 * public interface for ctype 500 */ 501 502 _CITRUS_CTYPE_DECLS(ZW); 503 _CITRUS_CTYPE_DEF_OPS(ZW); 504 505 #include "citrus_ctype_template.h" 506 507 /* ---------------------------------------------------------------------- 508 * public interface for stdenc 509 */ 510 511 _CITRUS_STDENC_DECLS(ZW); 512 _CITRUS_STDENC_DEF_OPS(ZW); 513 514 #include "citrus_stdenc_template.h" 515