11ab64890Smrg
21ab64890Smrg/*
31ab64890Smrg * UTF-8
41ab64890Smrg */
51ab64890Smrg
61ab64890Smrg/* Specification: RFC 2279 */
71ab64890Smrg
81ab64890Smrgstatic int
91ab64890Smrgutf8_mbtowc (conv_t conv, ucs4_t *pwc, const unsigned char *s, int n)
101ab64890Smrg{
111ab64890Smrg  unsigned char c = s[0];
121ab64890Smrg
131ab64890Smrg  if (c < 0x80) {
141ab64890Smrg    *pwc = c;
151ab64890Smrg    return 1;
161ab64890Smrg  } else if (c < 0xc2) {
171ab64890Smrg    return RET_ILSEQ;
181ab64890Smrg  } else if (c < 0xe0) {
191ab64890Smrg    if (n < 2)
201ab64890Smrg      return RET_TOOFEW(0);
211ab64890Smrg    if (!((s[1] ^ 0x80) < 0x40))
221ab64890Smrg      return RET_ILSEQ;
231ab64890Smrg    *pwc = ((ucs4_t) (c & 0x1f) << 6)
241ab64890Smrg           | (ucs4_t) (s[1] ^ 0x80);
251ab64890Smrg    return 2;
261ab64890Smrg  } else if (c < 0xf0) {
271ab64890Smrg    if (n < 3)
281ab64890Smrg      return RET_TOOFEW(0);
291ab64890Smrg    if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
301ab64890Smrg          && (c >= 0xe1 || s[1] >= 0xa0)))
311ab64890Smrg      return RET_ILSEQ;
321ab64890Smrg    *pwc = ((ucs4_t) (c & 0x0f) << 12)
331ab64890Smrg           | ((ucs4_t) (s[1] ^ 0x80) << 6)
341ab64890Smrg           | (ucs4_t) (s[2] ^ 0x80);
351ab64890Smrg    return 3;
361ab64890Smrg  } else if (c < 0xf8) {
371ab64890Smrg    if (n < 4)
381ab64890Smrg      return RET_TOOFEW(0);
391ab64890Smrg    if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
401ab64890Smrg          && (s[3] ^ 0x80) < 0x40
411ab64890Smrg          && (c >= 0xf1 || s[1] >= 0x90)))
421ab64890Smrg      return RET_ILSEQ;
431ab64890Smrg    *pwc = ((ucs4_t) (c & 0x07) << 18)
441ab64890Smrg           | ((ucs4_t) (s[1] ^ 0x80) << 12)
451ab64890Smrg           | ((ucs4_t) (s[2] ^ 0x80) << 6)
461ab64890Smrg           | (ucs4_t) (s[3] ^ 0x80);
471ab64890Smrg    return 4;
481ab64890Smrg  } else if (c < 0xfc) {
491ab64890Smrg    if (n < 5)
501ab64890Smrg      return RET_TOOFEW(0);
511ab64890Smrg    if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
521ab64890Smrg          && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
531ab64890Smrg          && (c >= 0xf9 || s[1] >= 0x88)))
541ab64890Smrg      return RET_ILSEQ;
551ab64890Smrg    *pwc = ((ucs4_t) (c & 0x03) << 24)
561ab64890Smrg           | ((ucs4_t) (s[1] ^ 0x80) << 18)
571ab64890Smrg           | ((ucs4_t) (s[2] ^ 0x80) << 12)
581ab64890Smrg           | ((ucs4_t) (s[3] ^ 0x80) << 6)
591ab64890Smrg           | (ucs4_t) (s[4] ^ 0x80);
601ab64890Smrg    return 5;
611ab64890Smrg  } else if (c < 0xfe) {
621ab64890Smrg    if (n < 6)
631ab64890Smrg      return RET_TOOFEW(0);
641ab64890Smrg    if (!((s[1] ^ 0x80) < 0x40 && (s[2] ^ 0x80) < 0x40
651ab64890Smrg          && (s[3] ^ 0x80) < 0x40 && (s[4] ^ 0x80) < 0x40
661ab64890Smrg          && (s[5] ^ 0x80) < 0x40
671ab64890Smrg          && (c >= 0xfd || s[1] >= 0x84)))
681ab64890Smrg      return RET_ILSEQ;
691ab64890Smrg    *pwc = ((ucs4_t) (c & 0x01) << 30)
701ab64890Smrg           | ((ucs4_t) (s[1] ^ 0x80) << 24)
711ab64890Smrg           | ((ucs4_t) (s[2] ^ 0x80) << 18)
721ab64890Smrg           | ((ucs4_t) (s[3] ^ 0x80) << 12)
731ab64890Smrg           | ((ucs4_t) (s[4] ^ 0x80) << 6)
741ab64890Smrg           | (ucs4_t) (s[5] ^ 0x80);
751ab64890Smrg    return 6;
761ab64890Smrg  } else
771ab64890Smrg    return RET_ILSEQ;
781ab64890Smrg}
791ab64890Smrg
801ab64890Smrgstatic int
811ab64890Smrgutf8_wctomb (conv_t conv, unsigned char *r, ucs4_t wc, int n) /* n == 0 is acceptable */
821ab64890Smrg{
831ab64890Smrg  int count;
841ab64890Smrg  if (wc < 0x80)
851ab64890Smrg    count = 1;
861ab64890Smrg  else if (wc < 0x800)
871ab64890Smrg    count = 2;
881ab64890Smrg  else if (wc < 0x10000)
891ab64890Smrg    count = 3;
901ab64890Smrg  else if (wc < 0x200000)
911ab64890Smrg    count = 4;
921ab64890Smrg  else if (wc < 0x4000000)
931ab64890Smrg    count = 5;
941ab64890Smrg  else if (wc <= 0x7fffffff)
951ab64890Smrg    count = 6;
961ab64890Smrg  else
971ab64890Smrg    return RET_ILSEQ;
981ab64890Smrg  if (n < count)
991ab64890Smrg    return RET_TOOSMALL;
1001ab64890Smrg  switch (count) { /* note: code falls through cases! */
1019c019ec5Smaya    case 6: r[5] = (unsigned char) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0x4000000;
1029c019ec5Smaya    case 5: r[4] = (unsigned char) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0x200000;
1039c019ec5Smaya    case 4: r[3] = (unsigned char) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0x10000;
1049c019ec5Smaya    case 3: r[2] = (unsigned char) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0x800;
1059c019ec5Smaya    case 2: r[1] = (unsigned char) (0x80 | (wc & 0x3f)); wc = wc >> 6; wc |= 0xc0;
1069c019ec5Smaya    case 1: r[0] = (unsigned char) wc;
1071ab64890Smrg  }
1081ab64890Smrg  return count;
1091ab64890Smrg}
110