Home | History | Annotate | Line # | Download | only in time
strptime.c revision 1.33
      1 /*	$NetBSD: strptime.c,v 1.33 2009/05/24 02:25:43 ginsbach Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code was contributed to The NetBSD Foundation by Klaus Klein.
      8  * Heavily optimised by David Laight
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 #if defined(LIBC_SCCS) && !defined(lint)
     34 __RCSID("$NetBSD: strptime.c,v 1.33 2009/05/24 02:25:43 ginsbach Exp $");
     35 #endif
     36 
     37 #include "namespace.h"
     38 #include <sys/localedef.h>
     39 #include <ctype.h>
     40 #include <locale.h>
     41 #include <string.h>
     42 #include <time.h>
     43 #include <tzfile.h>
     44 #include "private.h"
     45 
     46 #ifdef __weak_alias
     47 __weak_alias(strptime,_strptime)
     48 #endif
     49 
     50 #define	_ctloc(x)		(_CurrentTimeLocale->x)
     51 
     52 /*
     53  * We do not implement alternate representations. However, we always
     54  * check whether a given modifier is allowed for a certain conversion.
     55  */
     56 #define ALT_E			0x01
     57 #define ALT_O			0x02
     58 #define	LEGAL_ALT(x)		{ if (alt_format & ~(x)) return NULL; }
     59 
     60 static char gmt[] = { "GMT" };
     61 static char utc[] = { "UTC" };
     62 /* RFC-822/RFC-2822 */
     63 static const char * const nast[5] = {
     64        "EST",    "CST",    "MST",    "PST",    "\0\0\0"
     65 };
     66 static const char * const nadt[5] = {
     67        "EDT",    "CDT",    "MDT",    "PDT",    "\0\0\0"
     68 };
     69 
     70 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
     71 static const u_char *find_string(const u_char *, int *, const char * const *,
     72 	const char * const *, int);
     73 
     74 
     75 char *
     76 strptime(const char *buf, const char *fmt, struct tm *tm)
     77 {
     78 	unsigned char c;
     79 	const unsigned char *bp, *ep;
     80 	int alt_format, i, split_year = 0, neg = 0, offs;
     81 	const char *new_fmt;
     82 
     83 	bp = (const u_char *)buf;
     84 
     85 	while (bp != NULL && (c = *fmt++) != '\0') {
     86 		/* Clear `alternate' modifier prior to new conversion. */
     87 		alt_format = 0;
     88 		i = 0;
     89 
     90 		/* Eat up white-space. */
     91 		if (isspace(c)) {
     92 			while (isspace(*bp))
     93 				bp++;
     94 			continue;
     95 		}
     96 
     97 		if (c != '%')
     98 			goto literal;
     99 
    100 
    101 again:		switch (c = *fmt++) {
    102 		case '%':	/* "%%" is converted to "%". */
    103 literal:
    104 			if (c != *bp++)
    105 				return NULL;
    106 			LEGAL_ALT(0);
    107 			continue;
    108 
    109 		/*
    110 		 * "Alternative" modifiers. Just set the appropriate flag
    111 		 * and start over again.
    112 		 */
    113 		case 'E':	/* "%E?" alternative conversion modifier. */
    114 			LEGAL_ALT(0);
    115 			alt_format |= ALT_E;
    116 			goto again;
    117 
    118 		case 'O':	/* "%O?" alternative conversion modifier. */
    119 			LEGAL_ALT(0);
    120 			alt_format |= ALT_O;
    121 			goto again;
    122 
    123 		/*
    124 		 * "Complex" conversion rules, implemented through recursion.
    125 		 */
    126 		case 'c':	/* Date and time, using the locale's format. */
    127 			new_fmt = _ctloc(d_t_fmt);
    128 			goto recurse;
    129 
    130 		case 'D':	/* The date as "%m/%d/%y". */
    131 			new_fmt = "%m/%d/%y";
    132 			LEGAL_ALT(0);
    133 			goto recurse;
    134 
    135 		case 'F':	/* The date as "%Y-%m-%d". */
    136 			new_fmt = "%Y-%m-%d";
    137 			LEGAL_ALT(0);
    138 			goto recurse;
    139 
    140 		case 'R':	/* The time as "%H:%M". */
    141 			new_fmt = "%H:%M";
    142 			LEGAL_ALT(0);
    143 			goto recurse;
    144 
    145 		case 'r':	/* The time in 12-hour clock representation. */
    146 			new_fmt =_ctloc(t_fmt_ampm);
    147 			LEGAL_ALT(0);
    148 			goto recurse;
    149 
    150 		case 'T':	/* The time as "%H:%M:%S". */
    151 			new_fmt = "%H:%M:%S";
    152 			LEGAL_ALT(0);
    153 			goto recurse;
    154 
    155 		case 'X':	/* The time, using the locale's format. */
    156 			new_fmt =_ctloc(t_fmt);
    157 			goto recurse;
    158 
    159 		case 'x':	/* The date, using the locale's format. */
    160 			new_fmt =_ctloc(d_fmt);
    161 		    recurse:
    162 			bp = (const u_char *)strptime((const char *)bp,
    163 							    new_fmt, tm);
    164 			LEGAL_ALT(ALT_E);
    165 			continue;
    166 
    167 		/*
    168 		 * "Elementary" conversion rules.
    169 		 */
    170 		case 'A':	/* The day of week, using the locale's form. */
    171 		case 'a':
    172 			bp = find_string(bp, &tm->tm_wday, _ctloc(day),
    173 					_ctloc(abday), 7);
    174 			LEGAL_ALT(0);
    175 			continue;
    176 
    177 		case 'B':	/* The month, using the locale's form. */
    178 		case 'b':
    179 		case 'h':
    180 			bp = find_string(bp, &tm->tm_mon, _ctloc(mon),
    181 					_ctloc(abmon), 12);
    182 			LEGAL_ALT(0);
    183 			continue;
    184 
    185 		case 'C':	/* The century number. */
    186 			i = 20;
    187 			bp = conv_num(bp, &i, 0, 99);
    188 
    189 			i = i * 100 - TM_YEAR_BASE;
    190 			if (split_year)
    191 				i += tm->tm_year % 100;
    192 			split_year = 1;
    193 			tm->tm_year = i;
    194 			LEGAL_ALT(ALT_E);
    195 			continue;
    196 
    197 		case 'd':	/* The day of month. */
    198 		case 'e':
    199 			bp = conv_num(bp, &tm->tm_mday, 1, 31);
    200 			LEGAL_ALT(ALT_O);
    201 			continue;
    202 
    203 		case 'k':	/* The hour (24-hour clock representation). */
    204 			LEGAL_ALT(0);
    205 			/* FALLTHROUGH */
    206 		case 'H':
    207 			bp = conv_num(bp, &tm->tm_hour, 0, 23);
    208 			LEGAL_ALT(ALT_O);
    209 			continue;
    210 
    211 		case 'l':	/* The hour (12-hour clock representation). */
    212 			LEGAL_ALT(0);
    213 			/* FALLTHROUGH */
    214 		case 'I':
    215 			bp = conv_num(bp, &tm->tm_hour, 1, 12);
    216 			if (tm->tm_hour == 12)
    217 				tm->tm_hour = 0;
    218 			LEGAL_ALT(ALT_O);
    219 			continue;
    220 
    221 		case 'j':	/* The day of year. */
    222 			i = 1;
    223 			bp = conv_num(bp, &i, 1, 366);
    224 			tm->tm_yday = i - 1;
    225 			LEGAL_ALT(0);
    226 			continue;
    227 
    228 		case 'M':	/* The minute. */
    229 			bp = conv_num(bp, &tm->tm_min, 0, 59);
    230 			LEGAL_ALT(ALT_O);
    231 			continue;
    232 
    233 		case 'm':	/* The month. */
    234 			i = 1;
    235 			bp = conv_num(bp, &i, 1, 12);
    236 			tm->tm_mon = i - 1;
    237 			LEGAL_ALT(ALT_O);
    238 			continue;
    239 
    240 		case 'p':	/* The locale's equivalent of AM/PM. */
    241 			bp = find_string(bp, &i, _ctloc(am_pm), NULL, 2);
    242 			if (tm->tm_hour > 11)
    243 				return NULL;
    244 			tm->tm_hour += i * 12;
    245 			LEGAL_ALT(0);
    246 			continue;
    247 
    248 		case 'S':	/* The seconds. */
    249 			bp = conv_num(bp, &tm->tm_sec, 0, 61);
    250 			LEGAL_ALT(ALT_O);
    251 			continue;
    252 
    253 #ifndef TIME_MAX
    254 #define TIME_MAX	INT64_MAX
    255 #endif
    256 		case 's':	/* seconds since the epoch */
    257 			{
    258 				time_t sse = 0;
    259 				uint64_t rulim = TIME_MAX;
    260 
    261 				if (*bp < '0' || *bp > '9') {
    262 					bp = NULL;
    263 					continue;
    264 				}
    265 
    266 				do {
    267 					sse *= 10;
    268 					sse += *bp++ - '0';
    269 					rulim /= 10;
    270 				} while ((sse * 10 <= TIME_MAX) &&
    271 					 rulim && *bp >= '0' && *bp <= '9');
    272 
    273 				if (sse < 0 || (uint64_t)sse > TIME_MAX) {
    274 					bp = NULL;
    275 					continue;
    276 				}
    277 
    278 				if (localtime_r(&sse, tm) == NULL)
    279 					bp = NULL;
    280 			}
    281 			continue;
    282 
    283 		case 'U':	/* The week of year, beginning on sunday. */
    284 		case 'W':	/* The week of year, beginning on monday. */
    285 			/*
    286 			 * XXX This is bogus, as we can not assume any valid
    287 			 * information present in the tm structure at this
    288 			 * point to calculate a real value, so just check the
    289 			 * range for now.
    290 			 */
    291 			 bp = conv_num(bp, &i, 0, 53);
    292 			 LEGAL_ALT(ALT_O);
    293 			 continue;
    294 
    295 		case 'w':	/* The day of week, beginning on sunday. */
    296 			bp = conv_num(bp, &tm->tm_wday, 0, 6);
    297 			LEGAL_ALT(ALT_O);
    298 			continue;
    299 
    300 		case 'u':	/* The day of week, monday = 1. */
    301 			bp = conv_num(bp, &i, 1, 7);
    302 			tm->tm_wday = i % 7;
    303 			LEGAL_ALT(ALT_O);
    304 			continue;
    305 
    306 		case 'g':	/* The year corresponding to the ISO week
    307 				 * number but without the century.
    308 				 */
    309 			bp = conv_num(bp, &i, 0, 99);
    310 			continue;
    311 
    312 		case 'G':	/* The year corresponding to the ISO week
    313 				 * number with century.
    314 				 */
    315 			do
    316 				bp++;
    317 			while (isdigit(*bp));
    318 			continue;
    319 
    320 		case 'V':	/* The ISO 8601:1988 week number as decimal */
    321 			bp = conv_num(bp, &i, 0, 53);
    322 			continue;
    323 
    324 		case 'Y':	/* The year. */
    325 			i = TM_YEAR_BASE;	/* just for data sanity... */
    326 			bp = conv_num(bp, &i, 0, 9999);
    327 			tm->tm_year = i - TM_YEAR_BASE;
    328 			LEGAL_ALT(ALT_E);
    329 			continue;
    330 
    331 		case 'y':	/* The year within 100 years of the epoch. */
    332 			/* LEGAL_ALT(ALT_E | ALT_O); */
    333 			bp = conv_num(bp, &i, 0, 99);
    334 
    335 			if (split_year)
    336 				/* preserve century */
    337 				i += (tm->tm_year / 100) * 100;
    338 			else {
    339 				split_year = 1;
    340 				if (i <= 68)
    341 					i = i + 2000 - TM_YEAR_BASE;
    342 				else
    343 					i = i + 1900 - TM_YEAR_BASE;
    344 			}
    345 			tm->tm_year = i;
    346 			continue;
    347 
    348 		case 'Z':
    349 			tzset();
    350 			if (strncmp((const char *)bp, gmt, 3) == 0) {
    351 				tm->tm_isdst = 0;
    352 #ifdef TM_GMTOFF
    353 				tm->TM_GMTOFF = 0;
    354 #endif
    355 #ifdef TM_ZONE
    356 				tm->TM_ZONE = gmt;
    357 #endif
    358 				bp += 3;
    359 			} else {
    360 				ep = find_string(bp, &i,
    361 					       	 (const char * const *)tzname,
    362 					       	  NULL, 2);
    363 				if (ep != NULL) {
    364 					tm->tm_isdst = i;
    365 #ifdef TM_GMTOFF
    366 					tm->TM_GMTOFF = -(timezone);
    367 #endif
    368 #ifdef TM_ZONE
    369 					tm->TM_ZONE = tzname[i];
    370 #endif
    371 				}
    372 				bp = ep;
    373 			}
    374 			continue;
    375 
    376 		case 'z':
    377 			/*
    378 			 * We recognize all ISO 8601 formats:
    379 			 * Z	= Zulu time/UTC
    380 			 * [+-]hhmm
    381 			 * [+-]hh:mm
    382 			 * [+-]hh
    383 			 * We recognize all RFC-822/RFC-2822 formats:
    384 			 * UT|GMT
    385 			 *          North American : UTC offsets
    386 			 * E[DS]T = Eastern : -4 | -5
    387 			 * C[DS]T = Central : -5 | -6
    388 			 * M[DS]T = Mountain: -6 | -7
    389 			 * P[DS]T = Pacific : -7 | -8
    390 			 *          Military
    391 			 * [A-IL-M] = -1 ... -9 (J not used)
    392 			 * [N-Y]  = +1 ... +12
    393 			 */
    394 			while (isspace(*bp))
    395 				bp++;
    396 
    397 			switch (*bp++) {
    398 			case 'G':
    399 				if (*bp++ != 'M')
    400 					return NULL;
    401 				/*FALLTHROUGH*/
    402 			case 'U':
    403 				if (*bp++ != 'T')
    404 					return NULL;
    405 				/*FALLTHROUGH*/
    406 			case 'Z':
    407 				tm->tm_isdst = 0;
    408 #ifdef TM_GMTOFF
    409 				tm->TM_GMTOFF = 0;
    410 #endif
    411 #ifdef TM_ZONE
    412 				tm->TM_ZONE = utc;
    413 #endif
    414 				continue;
    415 			case '+':
    416 				neg = 0;
    417 				break;
    418 			case '-':
    419 				neg = 1;
    420 				break;
    421 			default:
    422 				--bp;
    423 				ep = find_string(bp, &i, nast, NULL, 4);
    424 				if (ep != NULL) {
    425 #ifdef TM_GMTOFF
    426 					tm->TM_GMTOFF = -5 - i;
    427 #endif
    428 #ifdef TM_ZONE
    429 					tm->TM_ZONE = __UNCONST(nast[i]);
    430 #endif
    431 					bp = ep;
    432 					continue;
    433 				}
    434 				ep = find_string(bp, &i, nadt, NULL, 4);
    435 				if (ep != NULL) {
    436 					tm->tm_isdst = 1;
    437 #ifdef TM_GMTOFF
    438 					tm->TM_GMTOFF = -4 - i;
    439 #endif
    440 #ifdef TM_ZONE
    441 					tm->TM_ZONE = __UNCONST(nadt[i]);
    442 #endif
    443 					bp = ep;
    444 					continue;
    445 				}
    446 
    447 				if ((*bp >= 'A' && *bp <= 'I') ||
    448 				    (*bp >= 'L' && *bp <= 'Y')) {
    449 #ifdef TM_GMTOFF
    450 					/* Argh! No 'J'! */
    451 					if (*bp >= 'A' && *bp <= 'I')
    452 						tm->TM_GMTOFF =
    453 						    ('A' - 1) - (int)*bp;
    454 					else if (*bp >= 'L' && *bp <= 'M')
    455 						tm->TM_GMTOFF = 'A' - (int)*bp;
    456 					else if (*bp >= 'N' && *bp <= 'Y')
    457 						tm->TM_GMTOFF = (int)*bp - 'M';
    458 #endif
    459 #ifdef TM_ZONE
    460 					tm->TM_ZONE = NULL; /* XXX */
    461 #endif
    462 					bp++;
    463 					continue;
    464 				}
    465 				return NULL;
    466 			}
    467 			offs = 0;
    468 			for (i = 0; i < 4; ) {
    469 				if (isdigit(*bp)) {
    470 					offs = offs * 10 + (*bp++ - '0');
    471 					i++;
    472 					continue;
    473 				}
    474 				if (i == 2 && *bp == ':') {
    475 					bp++;
    476 					continue;
    477 				}
    478 				break;
    479 			}
    480 			switch (i) {
    481 			case 2:
    482 				offs *= 100;
    483 				break;
    484 			case 4:
    485 				i = offs % 100;
    486 				if (i >= 60)
    487 					return NULL;
    488 				/* Convert minutes into decimal */
    489 				offs = (offs / 100) * 100 + (i * 50) / 30;
    490 				break;
    491 			default:
    492 				return NULL;
    493 			}
    494 			if (neg)
    495 				offs = -offs;
    496 			tm->tm_isdst = 0;	/* XXX */
    497 #ifdef TM_GMTOFF
    498 			tm->TM_GMTOFF = offs;
    499 #endif
    500 #ifdef TM_ZONE
    501 			tm->TM_ZONE = NULL;	/* XXX */
    502 #endif
    503 			continue;
    504 
    505 		/*
    506 		 * Miscellaneous conversions.
    507 		 */
    508 		case 'n':	/* Any kind of white-space. */
    509 		case 't':
    510 			while (isspace(*bp))
    511 				bp++;
    512 			LEGAL_ALT(0);
    513 			continue;
    514 
    515 
    516 		default:	/* Unknown/unsupported conversion. */
    517 			return NULL;
    518 		}
    519 	}
    520 
    521 	return __UNCONST(bp);
    522 }
    523 
    524 
    525 static const u_char *
    526 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
    527 {
    528 	uint result = 0;
    529 	unsigned char ch;
    530 
    531 	/* The limit also determines the number of valid digits. */
    532 	uint rulim = ulim;
    533 
    534 	ch = *buf;
    535 	if (ch < '0' || ch > '9')
    536 		return NULL;
    537 
    538 	do {
    539 		result *= 10;
    540 		result += ch - '0';
    541 		rulim /= 10;
    542 		ch = *++buf;
    543 	} while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
    544 
    545 	if (result < llim || result > ulim)
    546 		return NULL;
    547 
    548 	*dest = result;
    549 	return buf;
    550 }
    551 
    552 static const u_char *
    553 find_string(const u_char *bp, int *tgt, const char * const *n1,
    554 		const char * const *n2, int c)
    555 {
    556 	int i;
    557 	unsigned int len;
    558 
    559 	/* check full name - then abbreviated ones */
    560 	for (; n1 != NULL; n1 = n2, n2 = NULL) {
    561 		for (i = 0; i < c; i++, n1++) {
    562 			len = strlen(*n1);
    563 			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
    564 				*tgt = i;
    565 				return bp + len;
    566 			}
    567 		}
    568 	}
    569 
    570 	/* Nothing matched */
    571 	return NULL;
    572 }
    573