Home | History | Annotate | Line # | Download | only in time
strptime.c revision 1.32
      1 /*	$NetBSD: strptime.c,v 1.32 2009/05/01 20:15:05 ginsbach Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code was contributed to The NetBSD Foundation by Klaus Klein.
      8  * Heavily optimised by David Laight
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 #if defined(LIBC_SCCS) && !defined(lint)
     34 __RCSID("$NetBSD: strptime.c,v 1.32 2009/05/01 20:15:05 ginsbach Exp $");
     35 #endif
     36 
     37 #include "namespace.h"
     38 #include <sys/localedef.h>
     39 #include <ctype.h>
     40 #include <locale.h>
     41 #include <string.h>
     42 #include <time.h>
     43 #include <tzfile.h>
     44 #include "private.h"
     45 
     46 #ifdef __weak_alias
     47 __weak_alias(strptime,_strptime)
     48 #endif
     49 
     50 #define	_ctloc(x)		(_CurrentTimeLocale->x)
     51 
     52 /*
     53  * We do not implement alternate representations. However, we always
     54  * check whether a given modifier is allowed for a certain conversion.
     55  */
     56 #define ALT_E			0x01
     57 #define ALT_O			0x02
     58 #define	LEGAL_ALT(x)		{ if (alt_format & ~(x)) return NULL; }
     59 
     60 static char gmt[] = { "GMT" };
     61 static char utc[] = { "UTC" };
     62 /* RFC-822/RFC-2822 */
     63 static const char * const nast[5] = {
     64        "EST",    "CST",    "MST",    "PST",    "\0\0\0"
     65 };
     66 static const char * const nadt[5] = {
     67        "EDT",    "CDT",    "MDT",    "PDT",    "\0\0\0"
     68 };
     69 
     70 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
     71 static const u_char *find_string(const u_char *, int *, const char * const *,
     72 	const char * const *, int);
     73 
     74 
     75 char *
     76 strptime(const char *buf, const char *fmt, struct tm *tm)
     77 {
     78 	unsigned char c;
     79 	const unsigned char *bp, *ep;
     80 	int alt_format, i, split_year = 0, neg = 0, offs;
     81 	const char *new_fmt;
     82 
     83 	bp = (const u_char *)buf;
     84 
     85 	while (bp != NULL && (c = *fmt++) != '\0') {
     86 		/* Clear `alternate' modifier prior to new conversion. */
     87 		alt_format = 0;
     88 		i = 0;
     89 
     90 		/* Eat up white-space. */
     91 		if (isspace(c)) {
     92 			while (isspace(*bp))
     93 				bp++;
     94 			continue;
     95 		}
     96 
     97 		if (c != '%')
     98 			goto literal;
     99 
    100 
    101 again:		switch (c = *fmt++) {
    102 		case '%':	/* "%%" is converted to "%". */
    103 literal:
    104 			if (c != *bp++)
    105 				return NULL;
    106 			LEGAL_ALT(0);
    107 			continue;
    108 
    109 		/*
    110 		 * "Alternative" modifiers. Just set the appropriate flag
    111 		 * and start over again.
    112 		 */
    113 		case 'E':	/* "%E?" alternative conversion modifier. */
    114 			LEGAL_ALT(0);
    115 			alt_format |= ALT_E;
    116 			goto again;
    117 
    118 		case 'O':	/* "%O?" alternative conversion modifier. */
    119 			LEGAL_ALT(0);
    120 			alt_format |= ALT_O;
    121 			goto again;
    122 
    123 		/*
    124 		 * "Complex" conversion rules, implemented through recursion.
    125 		 */
    126 		case 'c':	/* Date and time, using the locale's format. */
    127 			new_fmt = _ctloc(d_t_fmt);
    128 			goto recurse;
    129 
    130 		case 'D':	/* The date as "%m/%d/%y". */
    131 			new_fmt = "%m/%d/%y";
    132 			LEGAL_ALT(0);
    133 			goto recurse;
    134 
    135 		case 'F':	/* The date as "%Y-%m-%d". */
    136 			new_fmt = "%Y-%m-%d";
    137 			LEGAL_ALT(0);
    138 			goto recurse;
    139 
    140 		case 'R':	/* The time as "%H:%M". */
    141 			new_fmt = "%H:%M";
    142 			LEGAL_ALT(0);
    143 			goto recurse;
    144 
    145 		case 'r':	/* The time in 12-hour clock representation. */
    146 			new_fmt =_ctloc(t_fmt_ampm);
    147 			LEGAL_ALT(0);
    148 			goto recurse;
    149 
    150 		case 'T':	/* The time as "%H:%M:%S". */
    151 			new_fmt = "%H:%M:%S";
    152 			LEGAL_ALT(0);
    153 			goto recurse;
    154 
    155 		case 'X':	/* The time, using the locale's format. */
    156 			new_fmt =_ctloc(t_fmt);
    157 			goto recurse;
    158 
    159 		case 'x':	/* The date, using the locale's format. */
    160 			new_fmt =_ctloc(d_fmt);
    161 		    recurse:
    162 			bp = (const u_char *)strptime((const char *)bp,
    163 							    new_fmt, tm);
    164 			LEGAL_ALT(ALT_E);
    165 			continue;
    166 
    167 		/*
    168 		 * "Elementary" conversion rules.
    169 		 */
    170 		case 'A':	/* The day of week, using the locale's form. */
    171 		case 'a':
    172 			bp = find_string(bp, &tm->tm_wday, _ctloc(day),
    173 					_ctloc(abday), 7);
    174 			LEGAL_ALT(0);
    175 			continue;
    176 
    177 		case 'B':	/* The month, using the locale's form. */
    178 		case 'b':
    179 		case 'h':
    180 			bp = find_string(bp, &tm->tm_mon, _ctloc(mon),
    181 					_ctloc(abmon), 12);
    182 			LEGAL_ALT(0);
    183 			continue;
    184 
    185 		case 'C':	/* The century number. */
    186 			i = 20;
    187 			bp = conv_num(bp, &i, 0, 99);
    188 
    189 			i = i * 100 - TM_YEAR_BASE;
    190 			if (split_year)
    191 				i += tm->tm_year % 100;
    192 			split_year = 1;
    193 			tm->tm_year = i;
    194 			LEGAL_ALT(ALT_E);
    195 			continue;
    196 
    197 		case 'd':	/* The day of month. */
    198 		case 'e':
    199 			bp = conv_num(bp, &tm->tm_mday, 1, 31);
    200 			LEGAL_ALT(ALT_O);
    201 			continue;
    202 
    203 		case 'k':	/* The hour (24-hour clock representation). */
    204 			LEGAL_ALT(0);
    205 			/* FALLTHROUGH */
    206 		case 'H':
    207 			bp = conv_num(bp, &tm->tm_hour, 0, 23);
    208 			LEGAL_ALT(ALT_O);
    209 			continue;
    210 
    211 		case 'l':	/* The hour (12-hour clock representation). */
    212 			LEGAL_ALT(0);
    213 			/* FALLTHROUGH */
    214 		case 'I':
    215 			bp = conv_num(bp, &tm->tm_hour, 1, 12);
    216 			if (tm->tm_hour == 12)
    217 				tm->tm_hour = 0;
    218 			LEGAL_ALT(ALT_O);
    219 			continue;
    220 
    221 		case 'j':	/* The day of year. */
    222 			i = 1;
    223 			bp = conv_num(bp, &i, 1, 366);
    224 			tm->tm_yday = i - 1;
    225 			LEGAL_ALT(0);
    226 			continue;
    227 
    228 		case 'M':	/* The minute. */
    229 			bp = conv_num(bp, &tm->tm_min, 0, 59);
    230 			LEGAL_ALT(ALT_O);
    231 			continue;
    232 
    233 		case 'm':	/* The month. */
    234 			i = 1;
    235 			bp = conv_num(bp, &i, 1, 12);
    236 			tm->tm_mon = i - 1;
    237 			LEGAL_ALT(ALT_O);
    238 			continue;
    239 
    240 		case 'p':	/* The locale's equivalent of AM/PM. */
    241 			bp = find_string(bp, &i, _ctloc(am_pm), NULL, 2);
    242 			if (tm->tm_hour > 11)
    243 				return NULL;
    244 			tm->tm_hour += i * 12;
    245 			LEGAL_ALT(0);
    246 			continue;
    247 
    248 		case 'S':	/* The seconds. */
    249 			bp = conv_num(bp, &tm->tm_sec, 0, 61);
    250 			LEGAL_ALT(ALT_O);
    251 			continue;
    252 
    253 		case 'U':	/* The week of year, beginning on sunday. */
    254 		case 'W':	/* The week of year, beginning on monday. */
    255 			/*
    256 			 * XXX This is bogus, as we can not assume any valid
    257 			 * information present in the tm structure at this
    258 			 * point to calculate a real value, so just check the
    259 			 * range for now.
    260 			 */
    261 			 bp = conv_num(bp, &i, 0, 53);
    262 			 LEGAL_ALT(ALT_O);
    263 			 continue;
    264 
    265 		case 'w':	/* The day of week, beginning on sunday. */
    266 			bp = conv_num(bp, &tm->tm_wday, 0, 6);
    267 			LEGAL_ALT(ALT_O);
    268 			continue;
    269 
    270 		case 'u':	/* The day of week, monday = 1. */
    271 			bp = conv_num(bp, &i, 1, 7);
    272 			tm->tm_wday = i % 7;
    273 			LEGAL_ALT(ALT_O);
    274 			continue;
    275 
    276 		case 'g':	/* The year corresponding to the ISO week
    277 				 * number but without the century.
    278 				 */
    279 			bp = conv_num(bp, &i, 0, 99);
    280 			continue;
    281 
    282 		case 'G':	/* The year corresponding to the ISO week
    283 				 * number with century.
    284 				 */
    285 			do
    286 				bp++;
    287 			while (isdigit(*bp));
    288 			continue;
    289 
    290 		case 'V':	/* The ISO 8601:1988 week number as decimal */
    291 			bp = conv_num(bp, &i, 0, 53);
    292 			continue;
    293 
    294 		case 'Y':	/* The year. */
    295 			i = TM_YEAR_BASE;	/* just for data sanity... */
    296 			bp = conv_num(bp, &i, 0, 9999);
    297 			tm->tm_year = i - TM_YEAR_BASE;
    298 			LEGAL_ALT(ALT_E);
    299 			continue;
    300 
    301 		case 'y':	/* The year within 100 years of the epoch. */
    302 			/* LEGAL_ALT(ALT_E | ALT_O); */
    303 			bp = conv_num(bp, &i, 0, 99);
    304 
    305 			if (split_year)
    306 				/* preserve century */
    307 				i += (tm->tm_year / 100) * 100;
    308 			else {
    309 				split_year = 1;
    310 				if (i <= 68)
    311 					i = i + 2000 - TM_YEAR_BASE;
    312 				else
    313 					i = i + 1900 - TM_YEAR_BASE;
    314 			}
    315 			tm->tm_year = i;
    316 			continue;
    317 
    318 		case 'Z':
    319 			tzset();
    320 			if (strncmp((const char *)bp, gmt, 3) == 0) {
    321 				tm->tm_isdst = 0;
    322 #ifdef TM_GMTOFF
    323 				tm->TM_GMTOFF = 0;
    324 #endif
    325 #ifdef TM_ZONE
    326 				tm->TM_ZONE = gmt;
    327 #endif
    328 				bp += 3;
    329 			} else {
    330 				ep = find_string(bp, &i,
    331 					       	 (const char * const *)tzname,
    332 					       	  NULL, 2);
    333 				if (ep != NULL) {
    334 					tm->tm_isdst = i;
    335 #ifdef TM_GMTOFF
    336 					tm->TM_GMTOFF = -(timezone);
    337 #endif
    338 #ifdef TM_ZONE
    339 					tm->TM_ZONE = tzname[i];
    340 #endif
    341 				}
    342 				bp = ep;
    343 			}
    344 			continue;
    345 
    346 		case 'z':
    347 			/*
    348 			 * We recognize all ISO 8601 formats:
    349 			 * Z	= Zulu time/UTC
    350 			 * [+-]hhmm
    351 			 * [+-]hh:mm
    352 			 * [+-]hh
    353 			 * We recognize all RFC-822/RFC-2822 formats:
    354 			 * UT|GMT
    355 			 *          North American : UTC offsets
    356 			 * E[DS]T = Eastern : -4 | -5
    357 			 * C[DS]T = Central : -5 | -6
    358 			 * M[DS]T = Mountain: -6 | -7
    359 			 * P[DS]T = Pacific : -7 | -8
    360 			 *          Military
    361 			 * [A-IL-M] = -1 ... -9 (J not used)
    362 			 * [N-Y]  = +1 ... +12
    363 			 */
    364 			while (isspace(*bp))
    365 				bp++;
    366 
    367 			switch (*bp++) {
    368 			case 'G':
    369 				if (*bp++ != 'M')
    370 					return NULL;
    371 				/*FALLTHROUGH*/
    372 			case 'U':
    373 				if (*bp++ != 'T')
    374 					return NULL;
    375 				/*FALLTHROUGH*/
    376 			case 'Z':
    377 				tm->tm_isdst = 0;
    378 #ifdef TM_GMTOFF
    379 				tm->TM_GMTOFF = 0;
    380 #endif
    381 #ifdef TM_ZONE
    382 				tm->TM_ZONE = utc;
    383 #endif
    384 				continue;
    385 			case '+':
    386 				neg = 0;
    387 				break;
    388 			case '-':
    389 				neg = 1;
    390 				break;
    391 			default:
    392 				--bp;
    393 				ep = find_string(bp, &i, nast, NULL, 4);
    394 				if (ep != NULL) {
    395 #ifdef TM_GMTOFF
    396 					tm->TM_GMTOFF = -5 - i;
    397 #endif
    398 #ifdef TM_ZONE
    399 					tm->TM_ZONE = __UNCONST(nast[i]);
    400 #endif
    401 					bp = ep;
    402 					continue;
    403 				}
    404 				ep = find_string(bp, &i, nadt, NULL, 4);
    405 				if (ep != NULL) {
    406 					tm->tm_isdst = 1;
    407 #ifdef TM_GMTOFF
    408 					tm->TM_GMTOFF = -4 - i;
    409 #endif
    410 #ifdef TM_ZONE
    411 					tm->TM_ZONE = __UNCONST(nadt[i]);
    412 #endif
    413 					bp = ep;
    414 					continue;
    415 				}
    416 
    417 				if ((*bp >= 'A' && *bp <= 'I') ||
    418 				    (*bp >= 'L' && *bp <= 'Y')) {
    419 #ifdef TM_GMTOFF
    420 					/* Argh! No 'J'! */
    421 					if (*bp >= 'A' && *bp <= 'I')
    422 						tm->TM_GMTOFF =
    423 						    ('A' - 1) - (int)*bp;
    424 					else if (*bp >= 'L' && *bp <= 'M')
    425 						tm->TM_GMTOFF = 'A' - (int)*bp;
    426 					else if (*bp >= 'N' && *bp <= 'Y')
    427 						tm->TM_GMTOFF = (int)*bp - 'M';
    428 #endif
    429 #ifdef TM_ZONE
    430 					tm->TM_ZONE = NULL; /* XXX */
    431 #endif
    432 					bp++;
    433 					continue;
    434 				}
    435 				return NULL;
    436 			}
    437 			offs = 0;
    438 			for (i = 0; i < 4; ) {
    439 				if (isdigit(*bp)) {
    440 					offs = offs * 10 + (*bp++ - '0');
    441 					i++;
    442 					continue;
    443 				}
    444 				if (i == 2 && *bp == ':') {
    445 					bp++;
    446 					continue;
    447 				}
    448 				break;
    449 			}
    450 			switch (i) {
    451 			case 2:
    452 				offs *= 100;
    453 				break;
    454 			case 4:
    455 				i = offs % 100;
    456 				if (i >= 60)
    457 					return NULL;
    458 				/* Convert minutes into decimal */
    459 				offs = (offs / 100) * 100 + (i * 50) / 30;
    460 				break;
    461 			default:
    462 				return NULL;
    463 			}
    464 			if (neg)
    465 				offs = -offs;
    466 			tm->tm_isdst = 0;	/* XXX */
    467 #ifdef TM_GMTOFF
    468 			tm->TM_GMTOFF = offs;
    469 #endif
    470 #ifdef TM_ZONE
    471 			tm->TM_ZONE = NULL;	/* XXX */
    472 #endif
    473 			continue;
    474 
    475 		/*
    476 		 * Miscellaneous conversions.
    477 		 */
    478 		case 'n':	/* Any kind of white-space. */
    479 		case 't':
    480 			while (isspace(*bp))
    481 				bp++;
    482 			LEGAL_ALT(0);
    483 			continue;
    484 
    485 
    486 		default:	/* Unknown/unsupported conversion. */
    487 			return NULL;
    488 		}
    489 	}
    490 
    491 	return __UNCONST(bp);
    492 }
    493 
    494 
    495 static const u_char *
    496 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
    497 {
    498 	uint result = 0;
    499 	unsigned char ch;
    500 
    501 	/* The limit also determines the number of valid digits. */
    502 	uint rulim = ulim;
    503 
    504 	ch = *buf;
    505 	if (ch < '0' || ch > '9')
    506 		return NULL;
    507 
    508 	do {
    509 		result *= 10;
    510 		result += ch - '0';
    511 		rulim /= 10;
    512 		ch = *++buf;
    513 	} while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
    514 
    515 	if (result < llim || result > ulim)
    516 		return NULL;
    517 
    518 	*dest = result;
    519 	return buf;
    520 }
    521 
    522 static const u_char *
    523 find_string(const u_char *bp, int *tgt, const char * const *n1,
    524 		const char * const *n2, int c)
    525 {
    526 	int i;
    527 	unsigned int len;
    528 
    529 	/* check full name - then abbreviated ones */
    530 	for (; n1 != NULL; n1 = n2, n2 = NULL) {
    531 		for (i = 0; i < c; i++, n1++) {
    532 			len = strlen(*n1);
    533 			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
    534 				*tgt = i;
    535 				return bp + len;
    536 			}
    537 		}
    538 	}
    539 
    540 	/* Nothing matched */
    541 	return NULL;
    542 }
    543