strptime.c revision 1.32 1 /* $NetBSD: strptime.c,v 1.32 2009/05/01 20:15:05 ginsbach Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code was contributed to The NetBSD Foundation by Klaus Klein.
8 * Heavily optimised by David Laight
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #if defined(LIBC_SCCS) && !defined(lint)
34 __RCSID("$NetBSD: strptime.c,v 1.32 2009/05/01 20:15:05 ginsbach Exp $");
35 #endif
36
37 #include "namespace.h"
38 #include <sys/localedef.h>
39 #include <ctype.h>
40 #include <locale.h>
41 #include <string.h>
42 #include <time.h>
43 #include <tzfile.h>
44 #include "private.h"
45
46 #ifdef __weak_alias
47 __weak_alias(strptime,_strptime)
48 #endif
49
50 #define _ctloc(x) (_CurrentTimeLocale->x)
51
52 /*
53 * We do not implement alternate representations. However, we always
54 * check whether a given modifier is allowed for a certain conversion.
55 */
56 #define ALT_E 0x01
57 #define ALT_O 0x02
58 #define LEGAL_ALT(x) { if (alt_format & ~(x)) return NULL; }
59
60 static char gmt[] = { "GMT" };
61 static char utc[] = { "UTC" };
62 /* RFC-822/RFC-2822 */
63 static const char * const nast[5] = {
64 "EST", "CST", "MST", "PST", "\0\0\0"
65 };
66 static const char * const nadt[5] = {
67 "EDT", "CDT", "MDT", "PDT", "\0\0\0"
68 };
69
70 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
71 static const u_char *find_string(const u_char *, int *, const char * const *,
72 const char * const *, int);
73
74
75 char *
76 strptime(const char *buf, const char *fmt, struct tm *tm)
77 {
78 unsigned char c;
79 const unsigned char *bp, *ep;
80 int alt_format, i, split_year = 0, neg = 0, offs;
81 const char *new_fmt;
82
83 bp = (const u_char *)buf;
84
85 while (bp != NULL && (c = *fmt++) != '\0') {
86 /* Clear `alternate' modifier prior to new conversion. */
87 alt_format = 0;
88 i = 0;
89
90 /* Eat up white-space. */
91 if (isspace(c)) {
92 while (isspace(*bp))
93 bp++;
94 continue;
95 }
96
97 if (c != '%')
98 goto literal;
99
100
101 again: switch (c = *fmt++) {
102 case '%': /* "%%" is converted to "%". */
103 literal:
104 if (c != *bp++)
105 return NULL;
106 LEGAL_ALT(0);
107 continue;
108
109 /*
110 * "Alternative" modifiers. Just set the appropriate flag
111 * and start over again.
112 */
113 case 'E': /* "%E?" alternative conversion modifier. */
114 LEGAL_ALT(0);
115 alt_format |= ALT_E;
116 goto again;
117
118 case 'O': /* "%O?" alternative conversion modifier. */
119 LEGAL_ALT(0);
120 alt_format |= ALT_O;
121 goto again;
122
123 /*
124 * "Complex" conversion rules, implemented through recursion.
125 */
126 case 'c': /* Date and time, using the locale's format. */
127 new_fmt = _ctloc(d_t_fmt);
128 goto recurse;
129
130 case 'D': /* The date as "%m/%d/%y". */
131 new_fmt = "%m/%d/%y";
132 LEGAL_ALT(0);
133 goto recurse;
134
135 case 'F': /* The date as "%Y-%m-%d". */
136 new_fmt = "%Y-%m-%d";
137 LEGAL_ALT(0);
138 goto recurse;
139
140 case 'R': /* The time as "%H:%M". */
141 new_fmt = "%H:%M";
142 LEGAL_ALT(0);
143 goto recurse;
144
145 case 'r': /* The time in 12-hour clock representation. */
146 new_fmt =_ctloc(t_fmt_ampm);
147 LEGAL_ALT(0);
148 goto recurse;
149
150 case 'T': /* The time as "%H:%M:%S". */
151 new_fmt = "%H:%M:%S";
152 LEGAL_ALT(0);
153 goto recurse;
154
155 case 'X': /* The time, using the locale's format. */
156 new_fmt =_ctloc(t_fmt);
157 goto recurse;
158
159 case 'x': /* The date, using the locale's format. */
160 new_fmt =_ctloc(d_fmt);
161 recurse:
162 bp = (const u_char *)strptime((const char *)bp,
163 new_fmt, tm);
164 LEGAL_ALT(ALT_E);
165 continue;
166
167 /*
168 * "Elementary" conversion rules.
169 */
170 case 'A': /* The day of week, using the locale's form. */
171 case 'a':
172 bp = find_string(bp, &tm->tm_wday, _ctloc(day),
173 _ctloc(abday), 7);
174 LEGAL_ALT(0);
175 continue;
176
177 case 'B': /* The month, using the locale's form. */
178 case 'b':
179 case 'h':
180 bp = find_string(bp, &tm->tm_mon, _ctloc(mon),
181 _ctloc(abmon), 12);
182 LEGAL_ALT(0);
183 continue;
184
185 case 'C': /* The century number. */
186 i = 20;
187 bp = conv_num(bp, &i, 0, 99);
188
189 i = i * 100 - TM_YEAR_BASE;
190 if (split_year)
191 i += tm->tm_year % 100;
192 split_year = 1;
193 tm->tm_year = i;
194 LEGAL_ALT(ALT_E);
195 continue;
196
197 case 'd': /* The day of month. */
198 case 'e':
199 bp = conv_num(bp, &tm->tm_mday, 1, 31);
200 LEGAL_ALT(ALT_O);
201 continue;
202
203 case 'k': /* The hour (24-hour clock representation). */
204 LEGAL_ALT(0);
205 /* FALLTHROUGH */
206 case 'H':
207 bp = conv_num(bp, &tm->tm_hour, 0, 23);
208 LEGAL_ALT(ALT_O);
209 continue;
210
211 case 'l': /* The hour (12-hour clock representation). */
212 LEGAL_ALT(0);
213 /* FALLTHROUGH */
214 case 'I':
215 bp = conv_num(bp, &tm->tm_hour, 1, 12);
216 if (tm->tm_hour == 12)
217 tm->tm_hour = 0;
218 LEGAL_ALT(ALT_O);
219 continue;
220
221 case 'j': /* The day of year. */
222 i = 1;
223 bp = conv_num(bp, &i, 1, 366);
224 tm->tm_yday = i - 1;
225 LEGAL_ALT(0);
226 continue;
227
228 case 'M': /* The minute. */
229 bp = conv_num(bp, &tm->tm_min, 0, 59);
230 LEGAL_ALT(ALT_O);
231 continue;
232
233 case 'm': /* The month. */
234 i = 1;
235 bp = conv_num(bp, &i, 1, 12);
236 tm->tm_mon = i - 1;
237 LEGAL_ALT(ALT_O);
238 continue;
239
240 case 'p': /* The locale's equivalent of AM/PM. */
241 bp = find_string(bp, &i, _ctloc(am_pm), NULL, 2);
242 if (tm->tm_hour > 11)
243 return NULL;
244 tm->tm_hour += i * 12;
245 LEGAL_ALT(0);
246 continue;
247
248 case 'S': /* The seconds. */
249 bp = conv_num(bp, &tm->tm_sec, 0, 61);
250 LEGAL_ALT(ALT_O);
251 continue;
252
253 case 'U': /* The week of year, beginning on sunday. */
254 case 'W': /* The week of year, beginning on monday. */
255 /*
256 * XXX This is bogus, as we can not assume any valid
257 * information present in the tm structure at this
258 * point to calculate a real value, so just check the
259 * range for now.
260 */
261 bp = conv_num(bp, &i, 0, 53);
262 LEGAL_ALT(ALT_O);
263 continue;
264
265 case 'w': /* The day of week, beginning on sunday. */
266 bp = conv_num(bp, &tm->tm_wday, 0, 6);
267 LEGAL_ALT(ALT_O);
268 continue;
269
270 case 'u': /* The day of week, monday = 1. */
271 bp = conv_num(bp, &i, 1, 7);
272 tm->tm_wday = i % 7;
273 LEGAL_ALT(ALT_O);
274 continue;
275
276 case 'g': /* The year corresponding to the ISO week
277 * number but without the century.
278 */
279 bp = conv_num(bp, &i, 0, 99);
280 continue;
281
282 case 'G': /* The year corresponding to the ISO week
283 * number with century.
284 */
285 do
286 bp++;
287 while (isdigit(*bp));
288 continue;
289
290 case 'V': /* The ISO 8601:1988 week number as decimal */
291 bp = conv_num(bp, &i, 0, 53);
292 continue;
293
294 case 'Y': /* The year. */
295 i = TM_YEAR_BASE; /* just for data sanity... */
296 bp = conv_num(bp, &i, 0, 9999);
297 tm->tm_year = i - TM_YEAR_BASE;
298 LEGAL_ALT(ALT_E);
299 continue;
300
301 case 'y': /* The year within 100 years of the epoch. */
302 /* LEGAL_ALT(ALT_E | ALT_O); */
303 bp = conv_num(bp, &i, 0, 99);
304
305 if (split_year)
306 /* preserve century */
307 i += (tm->tm_year / 100) * 100;
308 else {
309 split_year = 1;
310 if (i <= 68)
311 i = i + 2000 - TM_YEAR_BASE;
312 else
313 i = i + 1900 - TM_YEAR_BASE;
314 }
315 tm->tm_year = i;
316 continue;
317
318 case 'Z':
319 tzset();
320 if (strncmp((const char *)bp, gmt, 3) == 0) {
321 tm->tm_isdst = 0;
322 #ifdef TM_GMTOFF
323 tm->TM_GMTOFF = 0;
324 #endif
325 #ifdef TM_ZONE
326 tm->TM_ZONE = gmt;
327 #endif
328 bp += 3;
329 } else {
330 ep = find_string(bp, &i,
331 (const char * const *)tzname,
332 NULL, 2);
333 if (ep != NULL) {
334 tm->tm_isdst = i;
335 #ifdef TM_GMTOFF
336 tm->TM_GMTOFF = -(timezone);
337 #endif
338 #ifdef TM_ZONE
339 tm->TM_ZONE = tzname[i];
340 #endif
341 }
342 bp = ep;
343 }
344 continue;
345
346 case 'z':
347 /*
348 * We recognize all ISO 8601 formats:
349 * Z = Zulu time/UTC
350 * [+-]hhmm
351 * [+-]hh:mm
352 * [+-]hh
353 * We recognize all RFC-822/RFC-2822 formats:
354 * UT|GMT
355 * North American : UTC offsets
356 * E[DS]T = Eastern : -4 | -5
357 * C[DS]T = Central : -5 | -6
358 * M[DS]T = Mountain: -6 | -7
359 * P[DS]T = Pacific : -7 | -8
360 * Military
361 * [A-IL-M] = -1 ... -9 (J not used)
362 * [N-Y] = +1 ... +12
363 */
364 while (isspace(*bp))
365 bp++;
366
367 switch (*bp++) {
368 case 'G':
369 if (*bp++ != 'M')
370 return NULL;
371 /*FALLTHROUGH*/
372 case 'U':
373 if (*bp++ != 'T')
374 return NULL;
375 /*FALLTHROUGH*/
376 case 'Z':
377 tm->tm_isdst = 0;
378 #ifdef TM_GMTOFF
379 tm->TM_GMTOFF = 0;
380 #endif
381 #ifdef TM_ZONE
382 tm->TM_ZONE = utc;
383 #endif
384 continue;
385 case '+':
386 neg = 0;
387 break;
388 case '-':
389 neg = 1;
390 break;
391 default:
392 --bp;
393 ep = find_string(bp, &i, nast, NULL, 4);
394 if (ep != NULL) {
395 #ifdef TM_GMTOFF
396 tm->TM_GMTOFF = -5 - i;
397 #endif
398 #ifdef TM_ZONE
399 tm->TM_ZONE = __UNCONST(nast[i]);
400 #endif
401 bp = ep;
402 continue;
403 }
404 ep = find_string(bp, &i, nadt, NULL, 4);
405 if (ep != NULL) {
406 tm->tm_isdst = 1;
407 #ifdef TM_GMTOFF
408 tm->TM_GMTOFF = -4 - i;
409 #endif
410 #ifdef TM_ZONE
411 tm->TM_ZONE = __UNCONST(nadt[i]);
412 #endif
413 bp = ep;
414 continue;
415 }
416
417 if ((*bp >= 'A' && *bp <= 'I') ||
418 (*bp >= 'L' && *bp <= 'Y')) {
419 #ifdef TM_GMTOFF
420 /* Argh! No 'J'! */
421 if (*bp >= 'A' && *bp <= 'I')
422 tm->TM_GMTOFF =
423 ('A' - 1) - (int)*bp;
424 else if (*bp >= 'L' && *bp <= 'M')
425 tm->TM_GMTOFF = 'A' - (int)*bp;
426 else if (*bp >= 'N' && *bp <= 'Y')
427 tm->TM_GMTOFF = (int)*bp - 'M';
428 #endif
429 #ifdef TM_ZONE
430 tm->TM_ZONE = NULL; /* XXX */
431 #endif
432 bp++;
433 continue;
434 }
435 return NULL;
436 }
437 offs = 0;
438 for (i = 0; i < 4; ) {
439 if (isdigit(*bp)) {
440 offs = offs * 10 + (*bp++ - '0');
441 i++;
442 continue;
443 }
444 if (i == 2 && *bp == ':') {
445 bp++;
446 continue;
447 }
448 break;
449 }
450 switch (i) {
451 case 2:
452 offs *= 100;
453 break;
454 case 4:
455 i = offs % 100;
456 if (i >= 60)
457 return NULL;
458 /* Convert minutes into decimal */
459 offs = (offs / 100) * 100 + (i * 50) / 30;
460 break;
461 default:
462 return NULL;
463 }
464 if (neg)
465 offs = -offs;
466 tm->tm_isdst = 0; /* XXX */
467 #ifdef TM_GMTOFF
468 tm->TM_GMTOFF = offs;
469 #endif
470 #ifdef TM_ZONE
471 tm->TM_ZONE = NULL; /* XXX */
472 #endif
473 continue;
474
475 /*
476 * Miscellaneous conversions.
477 */
478 case 'n': /* Any kind of white-space. */
479 case 't':
480 while (isspace(*bp))
481 bp++;
482 LEGAL_ALT(0);
483 continue;
484
485
486 default: /* Unknown/unsupported conversion. */
487 return NULL;
488 }
489 }
490
491 return __UNCONST(bp);
492 }
493
494
495 static const u_char *
496 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
497 {
498 uint result = 0;
499 unsigned char ch;
500
501 /* The limit also determines the number of valid digits. */
502 uint rulim = ulim;
503
504 ch = *buf;
505 if (ch < '0' || ch > '9')
506 return NULL;
507
508 do {
509 result *= 10;
510 result += ch - '0';
511 rulim /= 10;
512 ch = *++buf;
513 } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
514
515 if (result < llim || result > ulim)
516 return NULL;
517
518 *dest = result;
519 return buf;
520 }
521
522 static const u_char *
523 find_string(const u_char *bp, int *tgt, const char * const *n1,
524 const char * const *n2, int c)
525 {
526 int i;
527 unsigned int len;
528
529 /* check full name - then abbreviated ones */
530 for (; n1 != NULL; n1 = n2, n2 = NULL) {
531 for (i = 0; i < c; i++, n1++) {
532 len = strlen(*n1);
533 if (strncasecmp(*n1, (const char *)bp, len) == 0) {
534 *tgt = i;
535 return bp + len;
536 }
537 }
538 }
539
540 /* Nothing matched */
541 return NULL;
542 }
543