Home | History | Annotate | Line # | Download | only in time
strptime.c revision 1.39
      1 /*	$NetBSD: strptime.c,v 1.39 2015/04/06 14:38:22 ginsbach Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code was contributed to The NetBSD Foundation by Klaus Klein.
      8  * Heavily optimised by David Laight
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 #if defined(LIBC_SCCS) && !defined(lint)
     34 __RCSID("$NetBSD: strptime.c,v 1.39 2015/04/06 14:38:22 ginsbach Exp $");
     35 #endif
     36 
     37 #include "namespace.h"
     38 #include <sys/localedef.h>
     39 #include <ctype.h>
     40 #include <locale.h>
     41 #include <string.h>
     42 #include <time.h>
     43 #include <tzfile.h>
     44 #include "private.h"
     45 #include "setlocale_local.h"
     46 
     47 #ifdef __weak_alias
     48 __weak_alias(strptime,_strptime)
     49 __weak_alias(strptime_l, _strptime_l)
     50 #endif
     51 
     52 #define _TIME_LOCALE(loc) \
     53     ((_TimeLocale *)((loc)->part_impl[(size_t)LC_TIME]))
     54 
     55 /*
     56  * We do not implement alternate representations. However, we always
     57  * check whether a given modifier is allowed for a certain conversion.
     58  */
     59 #define ALT_E			0x01
     60 #define ALT_O			0x02
     61 #define	LEGAL_ALT(x)		{ if (alt_format & ~(x)) return NULL; }
     62 
     63 static char gmt[] = { "GMT" };
     64 static char utc[] = { "UTC" };
     65 /* RFC-822/RFC-2822 */
     66 static const char * const nast[5] = {
     67        "EST",    "CST",    "MST",    "PST",    "\0\0\0"
     68 };
     69 static const char * const nadt[5] = {
     70        "EDT",    "CDT",    "MDT",    "PDT",    "\0\0\0"
     71 };
     72 
     73 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
     74 static const u_char *find_string(const u_char *, int *, const char * const *,
     75 	const char * const *, int);
     76 
     77 char *
     78 strptime(const char *buf, const char *fmt, struct tm *tm)
     79 {
     80 	return strptime_l(buf, fmt, tm, _current_locale());
     81 }
     82 
     83 char *
     84 strptime_l(const char *buf, const char *fmt, struct tm *tm, locale_t loc)
     85 {
     86 	unsigned char c;
     87 	const unsigned char *bp, *ep;
     88 	int alt_format, i, split_year = 0, neg = 0, offs;
     89 	const char *new_fmt;
     90 
     91 	bp = (const u_char *)buf;
     92 
     93 	while (bp != NULL && (c = *fmt++) != '\0') {
     94 		/* Clear `alternate' modifier prior to new conversion. */
     95 		alt_format = 0;
     96 		i = 0;
     97 
     98 		/* Eat up white-space. */
     99 		if (isspace(c)) {
    100 			while (isspace(*bp))
    101 				bp++;
    102 			continue;
    103 		}
    104 
    105 		if (c != '%')
    106 			goto literal;
    107 
    108 
    109 again:		switch (c = *fmt++) {
    110 		case '%':	/* "%%" is converted to "%". */
    111 literal:
    112 			if (c != *bp++)
    113 				return NULL;
    114 			LEGAL_ALT(0);
    115 			continue;
    116 
    117 		/*
    118 		 * "Alternative" modifiers. Just set the appropriate flag
    119 		 * and start over again.
    120 		 */
    121 		case 'E':	/* "%E?" alternative conversion modifier. */
    122 			LEGAL_ALT(0);
    123 			alt_format |= ALT_E;
    124 			goto again;
    125 
    126 		case 'O':	/* "%O?" alternative conversion modifier. */
    127 			LEGAL_ALT(0);
    128 			alt_format |= ALT_O;
    129 			goto again;
    130 
    131 		/*
    132 		 * "Complex" conversion rules, implemented through recursion.
    133 		 */
    134 		case 'c':	/* Date and time, using the locale's format. */
    135 			new_fmt = _TIME_LOCALE(loc)->d_t_fmt;
    136 			goto recurse;
    137 
    138 		case 'D':	/* The date as "%m/%d/%y". */
    139 			new_fmt = "%m/%d/%y";
    140 			LEGAL_ALT(0);
    141 			goto recurse;
    142 
    143 		case 'F':	/* The date as "%Y-%m-%d". */
    144 			new_fmt = "%Y-%m-%d";
    145 			LEGAL_ALT(0);
    146 			goto recurse;
    147 
    148 		case 'R':	/* The time as "%H:%M". */
    149 			new_fmt = "%H:%M";
    150 			LEGAL_ALT(0);
    151 			goto recurse;
    152 
    153 		case 'r':	/* The time in 12-hour clock representation. */
    154 			new_fmt = _TIME_LOCALE(loc)->t_fmt_ampm;
    155 			LEGAL_ALT(0);
    156 			goto recurse;
    157 
    158 		case 'T':	/* The time as "%H:%M:%S". */
    159 			new_fmt = "%H:%M:%S";
    160 			LEGAL_ALT(0);
    161 			goto recurse;
    162 
    163 		case 'X':	/* The time, using the locale's format. */
    164 			new_fmt = _TIME_LOCALE(loc)->t_fmt;
    165 			goto recurse;
    166 
    167 		case 'x':	/* The date, using the locale's format. */
    168 			new_fmt = _TIME_LOCALE(loc)->d_fmt;
    169 		    recurse:
    170 			bp = (const u_char *)strptime((const char *)bp,
    171 							    new_fmt, tm);
    172 			LEGAL_ALT(ALT_E);
    173 			continue;
    174 
    175 		/*
    176 		 * "Elementary" conversion rules.
    177 		 */
    178 		case 'A':	/* The day of week, using the locale's form. */
    179 		case 'a':
    180 			bp = find_string(bp, &tm->tm_wday,
    181 			    _TIME_LOCALE(loc)->day, _TIME_LOCALE(loc)->abday, 7);
    182 			LEGAL_ALT(0);
    183 			continue;
    184 
    185 		case 'B':	/* The month, using the locale's form. */
    186 		case 'b':
    187 		case 'h':
    188 			bp = find_string(bp, &tm->tm_mon,
    189 			    _TIME_LOCALE(loc)->mon, _TIME_LOCALE(loc)->abmon,
    190 			    12);
    191 			LEGAL_ALT(0);
    192 			continue;
    193 
    194 		case 'C':	/* The century number. */
    195 			i = 20;
    196 			bp = conv_num(bp, &i, 0, 99);
    197 
    198 			i = i * 100 - TM_YEAR_BASE;
    199 			if (split_year)
    200 				i += tm->tm_year % 100;
    201 			split_year = 1;
    202 			tm->tm_year = i;
    203 			LEGAL_ALT(ALT_E);
    204 			continue;
    205 
    206 		case 'd':	/* The day of month. */
    207 		case 'e':
    208 			bp = conv_num(bp, &tm->tm_mday, 1, 31);
    209 			LEGAL_ALT(ALT_O);
    210 			continue;
    211 
    212 		case 'k':	/* The hour (24-hour clock representation). */
    213 			LEGAL_ALT(0);
    214 			/* FALLTHROUGH */
    215 		case 'H':
    216 			bp = conv_num(bp, &tm->tm_hour, 0, 23);
    217 			LEGAL_ALT(ALT_O);
    218 			continue;
    219 
    220 		case 'l':	/* The hour (12-hour clock representation). */
    221 			LEGAL_ALT(0);
    222 			/* FALLTHROUGH */
    223 		case 'I':
    224 			bp = conv_num(bp, &tm->tm_hour, 1, 12);
    225 			if (tm->tm_hour == 12)
    226 				tm->tm_hour = 0;
    227 			LEGAL_ALT(ALT_O);
    228 			continue;
    229 
    230 		case 'j':	/* The day of year. */
    231 			i = 1;
    232 			bp = conv_num(bp, &i, 1, 366);
    233 			tm->tm_yday = i - 1;
    234 			LEGAL_ALT(0);
    235 			continue;
    236 
    237 		case 'M':	/* The minute. */
    238 			bp = conv_num(bp, &tm->tm_min, 0, 59);
    239 			LEGAL_ALT(ALT_O);
    240 			continue;
    241 
    242 		case 'm':	/* The month. */
    243 			i = 1;
    244 			bp = conv_num(bp, &i, 1, 12);
    245 			tm->tm_mon = i - 1;
    246 			LEGAL_ALT(ALT_O);
    247 			continue;
    248 
    249 		case 'p':	/* The locale's equivalent of AM/PM. */
    250 			bp = find_string(bp, &i, _TIME_LOCALE(loc)->am_pm,
    251 			    NULL, 2);
    252 			if (tm->tm_hour > 11)
    253 				return NULL;
    254 			tm->tm_hour += i * 12;
    255 			LEGAL_ALT(0);
    256 			continue;
    257 
    258 		case 'S':	/* The seconds. */
    259 			bp = conv_num(bp, &tm->tm_sec, 0, 61);
    260 			LEGAL_ALT(ALT_O);
    261 			continue;
    262 
    263 #ifndef TIME_MAX
    264 #define TIME_MAX	INT64_MAX
    265 #endif
    266 		case 's':	/* seconds since the epoch */
    267 			{
    268 				time_t sse = 0;
    269 				uint64_t rulim = TIME_MAX;
    270 
    271 				if (*bp < '0' || *bp > '9') {
    272 					bp = NULL;
    273 					continue;
    274 				}
    275 
    276 				do {
    277 					sse *= 10;
    278 					sse += *bp++ - '0';
    279 					rulim /= 10;
    280 				} while ((sse * 10 <= TIME_MAX) &&
    281 					 rulim && *bp >= '0' && *bp <= '9');
    282 
    283 				if (sse < 0 || (uint64_t)sse > TIME_MAX) {
    284 					bp = NULL;
    285 					continue;
    286 				}
    287 
    288 				if (localtime_r(&sse, tm) == NULL)
    289 					bp = NULL;
    290 			}
    291 			continue;
    292 
    293 		case 'U':	/* The week of year, beginning on sunday. */
    294 		case 'W':	/* The week of year, beginning on monday. */
    295 			/*
    296 			 * XXX This is bogus, as we can not assume any valid
    297 			 * information present in the tm structure at this
    298 			 * point to calculate a real value, so just check the
    299 			 * range for now.
    300 			 */
    301 			 bp = conv_num(bp, &i, 0, 53);
    302 			 LEGAL_ALT(ALT_O);
    303 			 continue;
    304 
    305 		case 'w':	/* The day of week, beginning on sunday. */
    306 			bp = conv_num(bp, &tm->tm_wday, 0, 6);
    307 			LEGAL_ALT(ALT_O);
    308 			continue;
    309 
    310 		case 'u':	/* The day of week, monday = 1. */
    311 			bp = conv_num(bp, &i, 1, 7);
    312 			tm->tm_wday = i % 7;
    313 			LEGAL_ALT(ALT_O);
    314 			continue;
    315 
    316 		case 'g':	/* The year corresponding to the ISO week
    317 				 * number but without the century.
    318 				 */
    319 			bp = conv_num(bp, &i, 0, 99);
    320 			continue;
    321 
    322 		case 'G':	/* The year corresponding to the ISO week
    323 				 * number with century.
    324 				 */
    325 			do
    326 				bp++;
    327 			while (isdigit(*bp));
    328 			continue;
    329 
    330 		case 'V':	/* The ISO 8601:1988 week number as decimal */
    331 			bp = conv_num(bp, &i, 0, 53);
    332 			continue;
    333 
    334 		case 'Y':	/* The year. */
    335 			i = TM_YEAR_BASE;	/* just for data sanity... */
    336 			bp = conv_num(bp, &i, 0, 9999);
    337 			tm->tm_year = i - TM_YEAR_BASE;
    338 			LEGAL_ALT(ALT_E);
    339 			continue;
    340 
    341 		case 'y':	/* The year within 100 years of the epoch. */
    342 			/* LEGAL_ALT(ALT_E | ALT_O); */
    343 			bp = conv_num(bp, &i, 0, 99);
    344 
    345 			if (split_year)
    346 				/* preserve century */
    347 				i += (tm->tm_year / 100) * 100;
    348 			else {
    349 				split_year = 1;
    350 				if (i <= 68)
    351 					i = i + 2000 - TM_YEAR_BASE;
    352 				else
    353 					i = i + 1900 - TM_YEAR_BASE;
    354 			}
    355 			tm->tm_year = i;
    356 			continue;
    357 
    358 		case 'Z':
    359 			tzset();
    360 			if (strncmp((const char *)bp, gmt, 3) == 0 ||
    361 			    strncmp((const char *)bp, utc, 3) == 0) {
    362 				tm->tm_isdst = 0;
    363 #ifdef TM_GMTOFF
    364 				tm->TM_GMTOFF = 0;
    365 #endif
    366 #ifdef TM_ZONE
    367 				tm->TM_ZONE = gmt;
    368 #endif
    369 				bp += 3;
    370 			} else {
    371 				ep = find_string(bp, &i,
    372 					       	 (const char * const *)tzname,
    373 					       	  NULL, 2);
    374 				if (ep != NULL) {
    375 					tm->tm_isdst = i;
    376 #ifdef TM_GMTOFF
    377 					tm->TM_GMTOFF = -(timezone);
    378 #endif
    379 #ifdef TM_ZONE
    380 					tm->TM_ZONE = tzname[i];
    381 #endif
    382 				}
    383 				bp = ep;
    384 			}
    385 			continue;
    386 
    387 		case 'z':
    388 			/*
    389 			 * We recognize all ISO 8601 formats:
    390 			 * Z	= Zulu time/UTC
    391 			 * [+-]hhmm
    392 			 * [+-]hh:mm
    393 			 * [+-]hh
    394 			 * We recognize all RFC-822/RFC-2822 formats:
    395 			 * UT|GMT
    396 			 *          North American : UTC offsets
    397 			 * E[DS]T = Eastern : -4 | -5
    398 			 * C[DS]T = Central : -5 | -6
    399 			 * M[DS]T = Mountain: -6 | -7
    400 			 * P[DS]T = Pacific : -7 | -8
    401 			 *          Military
    402 			 * [A-IL-M] = -1 ... -9 (J not used)
    403 			 * [N-Y]  = +1 ... +12
    404 			 */
    405 			while (isspace(*bp))
    406 				bp++;
    407 
    408 			switch (*bp++) {
    409 			case 'G':
    410 				if (*bp++ != 'M')
    411 					return NULL;
    412 				/*FALLTHROUGH*/
    413 			case 'U':
    414 				if (*bp++ != 'T')
    415 					return NULL;
    416 				/*FALLTHROUGH*/
    417 			case 'Z':
    418 				tm->tm_isdst = 0;
    419 #ifdef TM_GMTOFF
    420 				tm->TM_GMTOFF = 0;
    421 #endif
    422 #ifdef TM_ZONE
    423 				tm->TM_ZONE = utc;
    424 #endif
    425 				continue;
    426 			case '+':
    427 				neg = 0;
    428 				break;
    429 			case '-':
    430 				neg = 1;
    431 				break;
    432 			default:
    433 				--bp;
    434 				ep = find_string(bp, &i, nast, NULL, 4);
    435 				if (ep != NULL) {
    436 #ifdef TM_GMTOFF
    437 					tm->TM_GMTOFF = -5 - i;
    438 #endif
    439 #ifdef TM_ZONE
    440 					tm->TM_ZONE = __UNCONST(nast[i]);
    441 #endif
    442 					bp = ep;
    443 					continue;
    444 				}
    445 				ep = find_string(bp, &i, nadt, NULL, 4);
    446 				if (ep != NULL) {
    447 					tm->tm_isdst = 1;
    448 #ifdef TM_GMTOFF
    449 					tm->TM_GMTOFF = -4 - i;
    450 #endif
    451 #ifdef TM_ZONE
    452 					tm->TM_ZONE = __UNCONST(nadt[i]);
    453 #endif
    454 					bp = ep;
    455 					continue;
    456 				}
    457 
    458 				if ((*bp >= 'A' && *bp <= 'I') ||
    459 				    (*bp >= 'L' && *bp <= 'Y')) {
    460 #ifdef TM_GMTOFF
    461 					/* Argh! No 'J'! */
    462 					if (*bp >= 'A' && *bp <= 'I')
    463 						tm->TM_GMTOFF =
    464 						    ('A' - 1) - (int)*bp;
    465 					else if (*bp >= 'L' && *bp <= 'M')
    466 						tm->TM_GMTOFF = 'A' - (int)*bp;
    467 					else if (*bp >= 'N' && *bp <= 'Y')
    468 						tm->TM_GMTOFF = (int)*bp - 'M';
    469 #endif
    470 #ifdef TM_ZONE
    471 					tm->TM_ZONE = NULL; /* XXX */
    472 #endif
    473 					bp++;
    474 					continue;
    475 				}
    476 				return NULL;
    477 			}
    478 			offs = 0;
    479 			for (i = 0; i < 4; ) {
    480 				if (isdigit(*bp)) {
    481 					offs = offs * 10 + (*bp++ - '0');
    482 					i++;
    483 					continue;
    484 				}
    485 				if (i == 2 && *bp == ':') {
    486 					bp++;
    487 					continue;
    488 				}
    489 				break;
    490 			}
    491 			switch (i) {
    492 			case 2:
    493 				offs *= 100;
    494 				break;
    495 			case 4:
    496 				i = offs % 100;
    497 				if (i >= 60)
    498 					return NULL;
    499 				/* Convert minutes into decimal */
    500 				offs = (offs / 100) * 100 + (i * 50) / 30;
    501 				break;
    502 			default:
    503 				return NULL;
    504 			}
    505 			if (neg)
    506 				offs = -offs;
    507 			tm->tm_isdst = 0;	/* XXX */
    508 #ifdef TM_GMTOFF
    509 			tm->TM_GMTOFF = offs;
    510 #endif
    511 #ifdef TM_ZONE
    512 			tm->TM_ZONE = NULL;	/* XXX */
    513 #endif
    514 			continue;
    515 
    516 		/*
    517 		 * Miscellaneous conversions.
    518 		 */
    519 		case 'n':	/* Any kind of white-space. */
    520 		case 't':
    521 			while (isspace(*bp))
    522 				bp++;
    523 			LEGAL_ALT(0);
    524 			continue;
    525 
    526 
    527 		default:	/* Unknown/unsupported conversion. */
    528 			return NULL;
    529 		}
    530 	}
    531 
    532 	return __UNCONST(bp);
    533 }
    534 
    535 
    536 static const u_char *
    537 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
    538 {
    539 	uint result = 0;
    540 	unsigned char ch;
    541 
    542 	/* The limit also determines the number of valid digits. */
    543 	uint rulim = ulim;
    544 
    545 	ch = *buf;
    546 	if (ch < '0' || ch > '9')
    547 		return NULL;
    548 
    549 	do {
    550 		result *= 10;
    551 		result += ch - '0';
    552 		rulim /= 10;
    553 		ch = *++buf;
    554 	} while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
    555 
    556 	if (result < llim || result > ulim)
    557 		return NULL;
    558 
    559 	*dest = result;
    560 	return buf;
    561 }
    562 
    563 static const u_char *
    564 find_string(const u_char *bp, int *tgt, const char * const *n1,
    565 		const char * const *n2, int c)
    566 {
    567 	int i;
    568 	size_t len;
    569 
    570 	/* check full name - then abbreviated ones */
    571 	for (; n1 != NULL; n1 = n2, n2 = NULL) {
    572 		for (i = 0; i < c; i++, n1++) {
    573 			len = strlen(*n1);
    574 			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
    575 				*tgt = i;
    576 				return bp + len;
    577 			}
    578 		}
    579 	}
    580 
    581 	/* Nothing matched */
    582 	return NULL;
    583 }
    584