Home | History | Annotate | Line # | Download | only in time
strptime.c revision 1.27
      1 /*	$NetBSD: strptime.c,v 1.27 2008/04/25 20:51:10 ginsbach Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code was contributed to The NetBSD Foundation by Klaus Klein.
      8  * Heavily optimised by David Laight
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  * 3. All advertising materials mentioning features or use of this software
     19  *    must display the following acknowledgement:
     20  *        This product includes software developed by the NetBSD
     21  *        Foundation, Inc. and its contributors.
     22  * 4. Neither the name of The NetBSD Foundation nor the names of its
     23  *    contributors may be used to endorse or promote products derived
     24  *    from this software without specific prior written permission.
     25  *
     26  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     27  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     28  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     29  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     30  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     31  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     32  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     33  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     34  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     35  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     36  * POSSIBILITY OF SUCH DAMAGE.
     37  */
     38 
     39 #include <sys/cdefs.h>
     40 #if defined(LIBC_SCCS) && !defined(lint)
     41 __RCSID("$NetBSD: strptime.c,v 1.27 2008/04/25 20:51:10 ginsbach Exp $");
     42 #endif
     43 
     44 #include "namespace.h"
     45 #include <sys/localedef.h>
     46 #include <ctype.h>
     47 #include <locale.h>
     48 #include <string.h>
     49 #include <time.h>
     50 #include <tzfile.h>
     51 
     52 #ifdef __weak_alias
     53 __weak_alias(strptime,_strptime)
     54 #endif
     55 
     56 #define	_ctloc(x)		(_CurrentTimeLocale->x)
     57 
     58 /*
     59  * We do not implement alternate representations. However, we always
     60  * check whether a given modifier is allowed for a certain conversion.
     61  */
     62 #define ALT_E			0x01
     63 #define ALT_O			0x02
     64 #define	LEGAL_ALT(x)		{ if (alt_format & ~(x)) return NULL; }
     65 
     66 static const char gmt[4] = { "GMT" };
     67 
     68 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
     69 static const u_char *find_string(const u_char *, int *, const char * const *,
     70 	const char * const *, int);
     71 
     72 
     73 char *
     74 strptime(const char *buf, const char *fmt, struct tm *tm)
     75 {
     76 	unsigned char c;
     77 	const unsigned char *bp;
     78 	int alt_format, i, split_year = 0;
     79 	const char *new_fmt;
     80 
     81 	bp = (const u_char *)buf;
     82 
     83 	while (bp != NULL && (c = *fmt++) != '\0') {
     84 		/* Clear `alternate' modifier prior to new conversion. */
     85 		alt_format = 0;
     86 		i = 0;
     87 
     88 		/* Eat up white-space. */
     89 		if (isspace(c)) {
     90 			while (isspace(*bp))
     91 				bp++;
     92 			continue;
     93 		}
     94 
     95 		if (c != '%')
     96 			goto literal;
     97 
     98 
     99 again:		switch (c = *fmt++) {
    100 		case '%':	/* "%%" is converted to "%". */
    101 literal:
    102 			if (c != *bp++)
    103 				return NULL;
    104 			LEGAL_ALT(0);
    105 			continue;
    106 
    107 		/*
    108 		 * "Alternative" modifiers. Just set the appropriate flag
    109 		 * and start over again.
    110 		 */
    111 		case 'E':	/* "%E?" alternative conversion modifier. */
    112 			LEGAL_ALT(0);
    113 			alt_format |= ALT_E;
    114 			goto again;
    115 
    116 		case 'O':	/* "%O?" alternative conversion modifier. */
    117 			LEGAL_ALT(0);
    118 			alt_format |= ALT_O;
    119 			goto again;
    120 
    121 		/*
    122 		 * "Complex" conversion rules, implemented through recursion.
    123 		 */
    124 		case 'c':	/* Date and time, using the locale's format. */
    125 			new_fmt = _ctloc(d_t_fmt);
    126 			goto recurse;
    127 
    128 		case 'D':	/* The date as "%m/%d/%y". */
    129 			new_fmt = "%m/%d/%y";
    130 			LEGAL_ALT(0);
    131 			goto recurse;
    132 
    133 		case 'F':	/* The date as "%Y-%m-%d". */
    134 			new_fmt = "%Y-%m-%d";
    135 			LEGAL_ALT(0);
    136 			goto recurse;
    137 
    138 		case 'R':	/* The time as "%H:%M". */
    139 			new_fmt = "%H:%M";
    140 			LEGAL_ALT(0);
    141 			goto recurse;
    142 
    143 		case 'r':	/* The time in 12-hour clock representation. */
    144 			new_fmt =_ctloc(t_fmt_ampm);
    145 			LEGAL_ALT(0);
    146 			goto recurse;
    147 
    148 		case 'T':	/* The time as "%H:%M:%S". */
    149 			new_fmt = "%H:%M:%S";
    150 			LEGAL_ALT(0);
    151 			goto recurse;
    152 
    153 		case 'X':	/* The time, using the locale's format. */
    154 			new_fmt =_ctloc(t_fmt);
    155 			goto recurse;
    156 
    157 		case 'x':	/* The date, using the locale's format. */
    158 			new_fmt =_ctloc(d_fmt);
    159 		    recurse:
    160 			bp = (const u_char *)strptime((const char *)bp,
    161 							    new_fmt, tm);
    162 			LEGAL_ALT(ALT_E);
    163 			continue;
    164 
    165 		/*
    166 		 * "Elementary" conversion rules.
    167 		 */
    168 		case 'A':	/* The day of week, using the locale's form. */
    169 		case 'a':
    170 			bp = find_string(bp, &tm->tm_wday, _ctloc(day),
    171 					_ctloc(abday), 7);
    172 			LEGAL_ALT(0);
    173 			continue;
    174 
    175 		case 'B':	/* The month, using the locale's form. */
    176 		case 'b':
    177 		case 'h':
    178 			bp = find_string(bp, &tm->tm_mon, _ctloc(mon),
    179 					_ctloc(abmon), 12);
    180 			LEGAL_ALT(0);
    181 			continue;
    182 
    183 		case 'C':	/* The century number. */
    184 			i = 20;
    185 			bp = conv_num(bp, &i, 0, 99);
    186 
    187 			i = i * 100 - TM_YEAR_BASE;
    188 			if (split_year)
    189 				i += tm->tm_year % 100;
    190 			split_year = 1;
    191 			tm->tm_year = i;
    192 			LEGAL_ALT(ALT_E);
    193 			continue;
    194 
    195 		case 'd':	/* The day of month. */
    196 		case 'e':
    197 			bp = conv_num(bp, &tm->tm_mday, 1, 31);
    198 			LEGAL_ALT(ALT_O);
    199 			continue;
    200 
    201 		case 'k':	/* The hour (24-hour clock representation). */
    202 			LEGAL_ALT(0);
    203 			/* FALLTHROUGH */
    204 		case 'H':
    205 			bp = conv_num(bp, &tm->tm_hour, 0, 23);
    206 			LEGAL_ALT(ALT_O);
    207 			continue;
    208 
    209 		case 'l':	/* The hour (12-hour clock representation). */
    210 			LEGAL_ALT(0);
    211 			/* FALLTHROUGH */
    212 		case 'I':
    213 			bp = conv_num(bp, &tm->tm_hour, 1, 12);
    214 			if (tm->tm_hour == 12)
    215 				tm->tm_hour = 0;
    216 			LEGAL_ALT(ALT_O);
    217 			continue;
    218 
    219 		case 'j':	/* The day of year. */
    220 			i = 1;
    221 			bp = conv_num(bp, &i, 1, 366);
    222 			tm->tm_yday = i - 1;
    223 			LEGAL_ALT(0);
    224 			continue;
    225 
    226 		case 'M':	/* The minute. */
    227 			bp = conv_num(bp, &tm->tm_min, 0, 59);
    228 			LEGAL_ALT(ALT_O);
    229 			continue;
    230 
    231 		case 'm':	/* The month. */
    232 			i = 1;
    233 			bp = conv_num(bp, &i, 1, 12);
    234 			tm->tm_mon = i - 1;
    235 			LEGAL_ALT(ALT_O);
    236 			continue;
    237 
    238 		case 'p':	/* The locale's equivalent of AM/PM. */
    239 			bp = find_string(bp, &i, _ctloc(am_pm), NULL, 2);
    240 			if (tm->tm_hour > 11)
    241 				return NULL;
    242 			tm->tm_hour += i * 12;
    243 			LEGAL_ALT(0);
    244 			continue;
    245 
    246 		case 'S':	/* The seconds. */
    247 			bp = conv_num(bp, &tm->tm_sec, 0, 61);
    248 			LEGAL_ALT(ALT_O);
    249 			continue;
    250 
    251 		case 'U':	/* The week of year, beginning on sunday. */
    252 		case 'W':	/* The week of year, beginning on monday. */
    253 			/*
    254 			 * XXX This is bogus, as we can not assume any valid
    255 			 * information present in the tm structure at this
    256 			 * point to calculate a real value, so just check the
    257 			 * range for now.
    258 			 */
    259 			 bp = conv_num(bp, &i, 0, 53);
    260 			 LEGAL_ALT(ALT_O);
    261 			 continue;
    262 
    263 		case 'w':	/* The day of week, beginning on sunday. */
    264 			bp = conv_num(bp, &tm->tm_wday, 0, 6);
    265 			LEGAL_ALT(ALT_O);
    266 			continue;
    267 
    268 		case 'Y':	/* The year. */
    269 			i = TM_YEAR_BASE;	/* just for data sanity... */
    270 			bp = conv_num(bp, &i, 0, 9999);
    271 			tm->tm_year = i - TM_YEAR_BASE;
    272 			LEGAL_ALT(ALT_E);
    273 			continue;
    274 
    275 		case 'y':	/* The year within 100 years of the epoch. */
    276 			/* LEGAL_ALT(ALT_E | ALT_O); */
    277 			bp = conv_num(bp, &i, 0, 99);
    278 
    279 			if (split_year)
    280 				/* preserve century */
    281 				i += (tm->tm_year / 100) * 100;
    282 			else {
    283 				split_year = 1;
    284 				if (i <= 68)
    285 					i = i + 2000 - TM_YEAR_BASE;
    286 				else
    287 					i = i + 1900 - TM_YEAR_BASE;
    288 			}
    289 			tm->tm_year = i;
    290 			continue;
    291 
    292 		case 'Z':
    293 			tzset();
    294 			if (strncmp((const char *)bp, gmt, 3) == 0) {
    295 				tm->tm_isdst = 0;
    296 #ifdef TM_GMTOFF
    297 				tm->TM_GMTOFF = 0;
    298 #endif
    299 #ifdef TM_ZONE
    300 				tm->TM_ZONE = gmt;
    301 #endif
    302 				bp += 3;
    303 			} else {
    304 				const unsigned char *ep;
    305 
    306 				ep = find_string(bp, &i,
    307 					       	 (const char * const *)tzname,
    308 					       	  NULL, 2);
    309 				if (ep != NULL) {
    310 					tm->tm_isdst = i;
    311 #ifdef TM_GMTOFF
    312 					tm->TM_GMTOFF = -(timezone);
    313 #endif
    314 #ifdef TM_ZONE
    315 					tm->TM_ZONE = tzname[i];
    316 #endif
    317 				}
    318 				bp = ep;
    319 			}
    320 			continue;
    321 
    322 		/*
    323 		 * Miscellaneous conversions.
    324 		 */
    325 		case 'n':	/* Any kind of white-space. */
    326 		case 't':
    327 			while (isspace(*bp))
    328 				bp++;
    329 			LEGAL_ALT(0);
    330 			continue;
    331 
    332 
    333 		default:	/* Unknown/unsupported conversion. */
    334 			return NULL;
    335 		}
    336 	}
    337 
    338 	return __UNCONST(bp);
    339 }
    340 
    341 
    342 static const u_char *
    343 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
    344 {
    345 	uint result = 0;
    346 	unsigned char ch;
    347 
    348 	/* The limit also determines the number of valid digits. */
    349 	uint rulim = ulim;
    350 
    351 	ch = *buf;
    352 	if (ch < '0' || ch > '9')
    353 		return NULL;
    354 
    355 	do {
    356 		result *= 10;
    357 		result += ch - '0';
    358 		rulim /= 10;
    359 		ch = *++buf;
    360 	} while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
    361 
    362 	if (result < llim || result > ulim)
    363 		return NULL;
    364 
    365 	*dest = result;
    366 	return buf;
    367 }
    368 
    369 static const u_char *
    370 find_string(const u_char *bp, int *tgt, const char * const *n1,
    371 		const char * const *n2, int c)
    372 {
    373 	int i;
    374 	unsigned int len;
    375 
    376 	/* check full name - then abbreviated ones */
    377 	for (; n1 != NULL; n1 = n2, n2 = NULL) {
    378 		for (i = 0; i < c; i++, n1++) {
    379 			len = strlen(*n1);
    380 			if (strncasecmp(*n1, (const char *)bp, len) == 0) {
    381 				*tgt = i;
    382 				return bp + len;
    383 			}
    384 		}
    385 	}
    386 
    387 	/* Nothing matched */
    388 	return NULL;
    389 }
    390