Home | History | Annotate | Line # | Download | only in time
strptime.c revision 1.30
      1 /*	$NetBSD: strptime.c,v 1.30 2008/11/04 20:17:56 christos Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code was contributed to The NetBSD Foundation by Klaus Klein.
      8  * Heavily optimised by David Laight
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include <sys/cdefs.h>
     33 #if defined(LIBC_SCCS) && !defined(lint)
     34 __RCSID("$NetBSD: strptime.c,v 1.30 2008/11/04 20:17:56 christos Exp $");
     35 #endif
     36 
     37 #include "namespace.h"
     38 #include <sys/localedef.h>
     39 #include <ctype.h>
     40 #include <locale.h>
     41 #include <string.h>
     42 #include <time.h>
     43 #include <tzfile.h>
     44 #include "private.h"
     45 
     46 #ifdef __weak_alias
     47 __weak_alias(strptime,_strptime)
     48 #endif
     49 
     50 #define	_ctloc(x)		(_CurrentTimeLocale->x)
     51 
     52 /*
     53  * We do not implement alternate representations. However, we always
     54  * check whether a given modifier is allowed for a certain conversion.
     55  */
     56 #define ALT_E			0x01
     57 #define ALT_O			0x02
     58 #define	LEGAL_ALT(x)		{ if (alt_format & ~(x)) return NULL; }
     59 
     60 static char gmt[] = { "GMT" };
     61 static char utc[] = { "UTC" };
     62 
     63 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
     64 static const u_char *find_string(const u_char *, int *, const char * const *,
     65 	const char * const *, int);
     66 
     67 
     68 char *
     69 strptime(const char *buf, const char *fmt, struct tm *tm)
     70 {
     71 	unsigned char c;
     72 	const unsigned char *bp;
     73 	int alt_format, i, split_year = 0, neg, offs;
     74 	const char *new_fmt;
     75 
     76 	bp = (const u_char *)buf;
     77 
     78 	while (bp != NULL && (c = *fmt++) != '\0') {
     79 		/* Clear `alternate' modifier prior to new conversion. */
     80 		alt_format = 0;
     81 		i = 0;
     82 
     83 		/* Eat up white-space. */
     84 		if (isspace(c)) {
     85 			while (isspace(*bp))
     86 				bp++;
     87 			continue;
     88 		}
     89 
     90 		if (c != '%')
     91 			goto literal;
     92 
     93 
     94 again:		switch (c = *fmt++) {
     95 		case '%':	/* "%%" is converted to "%". */
     96 literal:
     97 			if (c != *bp++)
     98 				return NULL;
     99 			LEGAL_ALT(0);
    100 			continue;
    101 
    102 		/*
    103 		 * "Alternative" modifiers. Just set the appropriate flag
    104 		 * and start over again.
    105 		 */
    106 		case 'E':	/* "%E?" alternative conversion modifier. */
    107 			LEGAL_ALT(0);
    108 			alt_format |= ALT_E;
    109 			goto again;
    110 
    111 		case 'O':	/* "%O?" alternative conversion modifier. */
    112 			LEGAL_ALT(0);
    113 			alt_format |= ALT_O;
    114 			goto again;
    115 
    116 		/*
    117 		 * "Complex" conversion rules, implemented through recursion.
    118 		 */
    119 		case 'c':	/* Date and time, using the locale's format. */
    120 			new_fmt = _ctloc(d_t_fmt);
    121 			goto recurse;
    122 
    123 		case 'D':	/* The date as "%m/%d/%y". */
    124 			new_fmt = "%m/%d/%y";
    125 			LEGAL_ALT(0);
    126 			goto recurse;
    127 
    128 		case 'F':	/* The date as "%Y-%m-%d". */
    129 			new_fmt = "%Y-%m-%d";
    130 			LEGAL_ALT(0);
    131 			goto recurse;
    132 
    133 		case 'R':	/* The time as "%H:%M". */
    134 			new_fmt = "%H:%M";
    135 			LEGAL_ALT(0);
    136 			goto recurse;
    137 
    138 		case 'r':	/* The time in 12-hour clock representation. */
    139 			new_fmt =_ctloc(t_fmt_ampm);
    140 			LEGAL_ALT(0);
    141 			goto recurse;
    142 
    143 		case 'T':	/* The time as "%H:%M:%S". */
    144 			new_fmt = "%H:%M:%S";
    145 			LEGAL_ALT(0);
    146 			goto recurse;
    147 
    148 		case 'X':	/* The time, using the locale's format. */
    149 			new_fmt =_ctloc(t_fmt);
    150 			goto recurse;
    151 
    152 		case 'x':	/* The date, using the locale's format. */
    153 			new_fmt =_ctloc(d_fmt);
    154 		    recurse:
    155 			bp = (const u_char *)strptime((const char *)bp,
    156 							    new_fmt, tm);
    157 			LEGAL_ALT(ALT_E);
    158 			continue;
    159 
    160 		/*
    161 		 * "Elementary" conversion rules.
    162 		 */
    163 		case 'A':	/* The day of week, using the locale's form. */
    164 		case 'a':
    165 			bp = find_string(bp, &tm->tm_wday, _ctloc(day),
    166 					_ctloc(abday), 7);
    167 			LEGAL_ALT(0);
    168 			continue;
    169 
    170 		case 'B':	/* The month, using the locale's form. */
    171 		case 'b':
    172 		case 'h':
    173 			bp = find_string(bp, &tm->tm_mon, _ctloc(mon),
    174 					_ctloc(abmon), 12);
    175 			LEGAL_ALT(0);
    176 			continue;
    177 
    178 		case 'C':	/* The century number. */
    179 			i = 20;
    180 			bp = conv_num(bp, &i, 0, 99);
    181 
    182 			i = i * 100 - TM_YEAR_BASE;
    183 			if (split_year)
    184 				i += tm->tm_year % 100;
    185 			split_year = 1;
    186 			tm->tm_year = i;
    187 			LEGAL_ALT(ALT_E);
    188 			continue;
    189 
    190 		case 'd':	/* The day of month. */
    191 		case 'e':
    192 			bp = conv_num(bp, &tm->tm_mday, 1, 31);
    193 			LEGAL_ALT(ALT_O);
    194 			continue;
    195 
    196 		case 'k':	/* The hour (24-hour clock representation). */
    197 			LEGAL_ALT(0);
    198 			/* FALLTHROUGH */
    199 		case 'H':
    200 			bp = conv_num(bp, &tm->tm_hour, 0, 23);
    201 			LEGAL_ALT(ALT_O);
    202 			continue;
    203 
    204 		case 'l':	/* The hour (12-hour clock representation). */
    205 			LEGAL_ALT(0);
    206 			/* FALLTHROUGH */
    207 		case 'I':
    208 			bp = conv_num(bp, &tm->tm_hour, 1, 12);
    209 			if (tm->tm_hour == 12)
    210 				tm->tm_hour = 0;
    211 			LEGAL_ALT(ALT_O);
    212 			continue;
    213 
    214 		case 'j':	/* The day of year. */
    215 			i = 1;
    216 			bp = conv_num(bp, &i, 1, 366);
    217 			tm->tm_yday = i - 1;
    218 			LEGAL_ALT(0);
    219 			continue;
    220 
    221 		case 'M':	/* The minute. */
    222 			bp = conv_num(bp, &tm->tm_min, 0, 59);
    223 			LEGAL_ALT(ALT_O);
    224 			continue;
    225 
    226 		case 'm':	/* The month. */
    227 			i = 1;
    228 			bp = conv_num(bp, &i, 1, 12);
    229 			tm->tm_mon = i - 1;
    230 			LEGAL_ALT(ALT_O);
    231 			continue;
    232 
    233 		case 'p':	/* The locale's equivalent of AM/PM. */
    234 			bp = find_string(bp, &i, _ctloc(am_pm), NULL, 2);
    235 			if (tm->tm_hour > 11)
    236 				return NULL;
    237 			tm->tm_hour += i * 12;
    238 			LEGAL_ALT(0);
    239 			continue;
    240 
    241 		case 'S':	/* The seconds. */
    242 			bp = conv_num(bp, &tm->tm_sec, 0, 61);
    243 			LEGAL_ALT(ALT_O);
    244 			continue;
    245 
    246 		case 'U':	/* The week of year, beginning on sunday. */
    247 		case 'W':	/* The week of year, beginning on monday. */
    248 			/*
    249 			 * XXX This is bogus, as we can not assume any valid
    250 			 * information present in the tm structure at this
    251 			 * point to calculate a real value, so just check the
    252 			 * range for now.
    253 			 */
    254 			 bp = conv_num(bp, &i, 0, 53);
    255 			 LEGAL_ALT(ALT_O);
    256 			 continue;
    257 
    258 		case 'w':	/* The day of week, beginning on sunday. */
    259 			bp = conv_num(bp, &tm->tm_wday, 0, 6);
    260 			LEGAL_ALT(ALT_O);
    261 			continue;
    262 
    263 		case 'u':	/* The day of week, monday = 1. */
    264 			bp = conv_num(bp, &i, 1, 7);
    265 			tm->tm_wday = i % 7;
    266 			LEGAL_ALT(ALT_O);
    267 			continue;
    268 
    269 		case 'g':	/* The year corresponding to the ISO week
    270 				 * number but without the century.
    271 				 */
    272 			bp = conv_num(bp, &i, 0, 99);
    273 			continue;
    274 
    275 		case 'G':	/* The year corresponding to the ISO week
    276 				 * number with century.
    277 				 */
    278 			do
    279 				bp++;
    280 			while (isdigit(*bp));
    281 			continue;
    282 
    283 		case 'V':	/* The ISO 8601:1988 week number as decimal */
    284 			bp = conv_num(bp, &i, 0, 53);
    285 			continue;
    286 
    287 		case 'Y':	/* The year. */
    288 			i = TM_YEAR_BASE;	/* just for data sanity... */
    289 			bp = conv_num(bp, &i, 0, 9999);
    290 			tm->tm_year = i - TM_YEAR_BASE;
    291 			LEGAL_ALT(ALT_E);
    292 			continue;
    293 
    294 		case 'y':	/* The year within 100 years of the epoch. */
    295 			/* LEGAL_ALT(ALT_E | ALT_O); */
    296 			bp = conv_num(bp, &i, 0, 99);
    297 
    298 			if (split_year)
    299 				/* preserve century */
    300 				i += (tm->tm_year / 100) * 100;
    301 			else {
    302 				split_year = 1;
    303 				if (i <= 68)
    304 					i = i + 2000 - TM_YEAR_BASE;
    305 				else
    306 					i = i + 1900 - TM_YEAR_BASE;
    307 			}
    308 			tm->tm_year = i;
    309 			continue;
    310 
    311 		case 'Z':
    312 			tzset();
    313 			if (strncmp((const char *)bp, gmt, 3) == 0) {
    314 				tm->tm_isdst = 0;
    315 #ifdef TM_GMTOFF
    316 				tm->TM_GMTOFF = 0;
    317 #endif
    318 #ifdef TM_ZONE
    319 				tm->TM_ZONE = gmt;
    320 #endif
    321 				bp += 3;
    322 			} else {
    323 				const unsigned char *ep;
    324 
    325 				ep = find_string(bp, &i,
    326 					       	 (const char * const *)tzname,
    327 					       	  NULL, 2);
    328 				if (ep != NULL) {
    329 					tm->tm_isdst = i;
    330 #ifdef TM_GMTOFF
    331 					tm->TM_GMTOFF = -(timezone);
    332 #endif
    333 #ifdef TM_ZONE
    334 					tm->TM_ZONE = tzname[i];
    335 #endif
    336 				}
    337 				bp = ep;
    338 			}
    339 			continue;
    340 
    341 		case 'z':
    342 			/*
    343 			 * We recognize all ISO 8601 formats:
    344 			 * Z	= Zulu time/UTC
    345 			 * [+-]hhmm
    346 			 * [+-]hh:mm
    347 			 * [+-]hh
    348 			 */
    349 			while (isspace(*bp))
    350 				bp++;
    351 
    352 			switch (*bp++) {
    353 			case 'Z':
    354 				tm->tm_isdst = 0;
    355 #ifdef TM_GMTOFF
    356 				tm->TM_GMTOFF = 0;
    357 #endif
    358 #ifdef TM_ZONE
    359 				tm->TM_ZONE = utc;
    360 #endif
    361 				continue;
    362 			case '+':
    363 				neg = 0;
    364 				break;
    365 			case '-':
    366 				neg = 1;
    367 				break;
    368 			default:
    369 				return NULL;
    370 			}
    371 			offs = 0;
    372 			for (i = 0; i < 4; ) {
    373 				if (isdigit(*bp)) {
    374 					offs = offs * 10 + (*bp++ - '0');
    375 					i++;
    376 					continue;
    377 				}
    378 				if (i == 2 && *bp == ':') {
    379 					bp++;
    380 					continue;
    381 				}
    382 				break;
    383 			}
    384 			switch (i) {
    385 			case 2:
    386 				offs *= 100;
    387 				break;
    388 			case 4:
    389 				i = offs % 100;
    390 				if (i >= 60)
    391 					return NULL;
    392 				/* Convert minutes into decimal */
    393 				offs = (offs / 100) * 100 + (i * 50) / 30;
    394 				break;
    395 			default:
    396 				return NULL;
    397 			}
    398 			tm->tm_isdst = 0;	/* XXX */
    399 #ifdef TM_GMTOFF
    400 			tm->TM_GMTOFF = offs;
    401 #endif
    402 #ifdef TM_ZONE
    403 			tm->TM_ZONE = NULL;	/* XXX */
    404 #endif
    405 			continue;
    406 
    407 		/*
    408 		 * Miscellaneous conversions.
    409 		 */
    410 		case 'n':	/* Any kind of white-space. */
    411 		case 't':
    412 			while (isspace(*bp))
    413 				bp++;
    414 			LEGAL_ALT(0);
    415 			continue;
    416 
    417 
    418 		default:	/* Unknown/unsupported conversion. */
    419 			return NULL;
    420 		}
    421 	}
    422 
    423 	return __UNCONST(bp);
    424 }
    425 
    426 
    427 static const u_char *
    428 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
    429 {
    430 	uint result = 0;
    431 	unsigned char ch;
    432 
    433 	/* The limit also determines the number of valid digits. */
    434 	uint rulim = ulim;
    435 
    436 	ch = *buf;
    437 	if (ch < '0' || ch > '9')
    438 		return NULL;
    439 
    440 	do {
    441 		result *= 10;
    442 		result += ch - '0';
    443 		rulim /= 10;
    444 		ch = *++buf;
    445 	} while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
    446 
    447 	if (result < llim || result > ulim)
    448 		return NULL;
    449 
    450 	*dest = result;
    451 	return buf;
    452 }
    453 
    454 static const u_char *
    455 find_string(const u_char *bp, int *tgt, const char * const *n1,
    456 		const char * const *n2, int c)
    457 {
    458 	int i;
    459 	unsigned int len;
    460 
    461 	/* check full name - then abbreviated ones */
    462 	for (; n1 != NULL; n1 = n2, n2 = NULL) {
    463 		for (i = 0; i < c; i++, n1++) {
    464 			len = strlen(*n1);
    465 			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
    466 				*tgt = i;
    467 				return bp + len;
    468 			}
    469 		}
    470 	}
    471 
    472 	/* Nothing matched */
    473 	return NULL;
    474 }
    475