strptime.c revision 1.46 1 /* $NetBSD: strptime.c,v 1.46 2015/07/20 14:37:11 ginsbach Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code was contributed to The NetBSD Foundation by Klaus Klein.
8 * Heavily optimised by David Laight
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #if defined(LIBC_SCCS) && !defined(lint)
34 __RCSID("$NetBSD: strptime.c,v 1.46 2015/07/20 14:37:11 ginsbach Exp $");
35 #endif
36
37 #include "namespace.h"
38 #include <sys/localedef.h>
39 #include <sys/types.h>
40 #include <ctype.h>
41 #include <locale.h>
42 #include <string.h>
43 #include <time.h>
44 #include <tzfile.h>
45 #include "private.h"
46 #include "setlocale_local.h"
47
48 #ifdef __weak_alias
49 __weak_alias(strptime,_strptime)
50 __weak_alias(strptime_l, _strptime_l)
51 #endif
52
53 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
54 static const u_char *find_string(const u_char *, int *, const char * const *,
55 const char * const *, int);
56
57 #define _TIME_LOCALE(loc) \
58 ((_TimeLocale *)((loc)->part_impl[(size_t)LC_TIME]))
59
60 /*
61 * We do not implement alternate representations. However, we always
62 * check whether a given modifier is allowed for a certain conversion.
63 */
64 #define ALT_E 0x01
65 #define ALT_O 0x02
66 #define LEGAL_ALT(x) { if (alt_format & ~(x)) return NULL; }
67
68 #define S_YEAR (1 << 0)
69 #define S_MON (1 << 1)
70 #define S_YDAY (1 << 2)
71 #define S_MDAY (1 << 3)
72 #define S_WDAY (1 << 4)
73
74 #define HAVE_MDAY(s) (s & S_MDAY)
75 #define HAVE_MON(s) (s & S_MON)
76 #define HAVE_WDAY(s) (s & S_WDAY)
77 #define HAVE_YDAY(s) (s & S_YDAY)
78 #define HAVE_YEAR(s) (s & S_YEAR)
79
80 static char gmt[] = { "GMT" };
81 static char utc[] = { "UTC" };
82 /* RFC-822/RFC-2822 */
83 static const char * const nast[5] = {
84 "EST", "CST", "MST", "PST", "\0\0\0"
85 };
86 static const char * const nadt[5] = {
87 "EDT", "CDT", "MDT", "PDT", "\0\0\0"
88 };
89
90 /*
91 * Table to determine the ordinal date for the start of a month.
92 * Ref: http://en.wikipedia.org/wiki/ISO_week_date
93 */
94 static const int start_of_month[2][13] = {
95 /* non-leap year */
96 { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
97 /* leap year */
98 { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
99 };
100
101 /*
102 * Calculate the week day of the first day of a year. Valid for
103 * the Gregorian calendar, which began Sept 14, 1752 in the UK
104 * and its colonies. Ref:
105 * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week
106 */
107
108 static int
109 first_wday_of(int yr)
110 {
111 return ((2 * (3 - (yr / 100) % 4)) + (yr % 100) + ((yr % 100) / 4) +
112 (isleap(yr) ? 6 : 0) + 1) % 7;
113 }
114
115 char *
116 strptime(const char *buf, const char *fmt, struct tm *tm)
117 {
118 return strptime_l(buf, fmt, tm, _current_locale());
119 }
120
121 char *
122 strptime_l(const char *buf, const char *fmt, struct tm *tm, locale_t loc)
123 {
124 unsigned char c;
125 const unsigned char *bp, *ep;
126 int alt_format, i, split_year = 0, neg = 0, state = 0,
127 day_offset = -1, week_offset = 0, offs;
128 const char *new_fmt;
129
130 bp = (const u_char *)buf;
131
132 while (bp != NULL && (c = *fmt++) != '\0') {
133 /* Clear `alternate' modifier prior to new conversion. */
134 alt_format = 0;
135 i = 0;
136
137 /* Eat up white-space. */
138 if (isspace(c)) {
139 while (isspace(*bp))
140 bp++;
141 continue;
142 }
143
144 if (c != '%')
145 goto literal;
146
147
148 again: switch (c = *fmt++) {
149 case '%': /* "%%" is converted to "%". */
150 literal:
151 if (c != *bp++)
152 return NULL;
153 LEGAL_ALT(0);
154 continue;
155
156 /*
157 * "Alternative" modifiers. Just set the appropriate flag
158 * and start over again.
159 */
160 case 'E': /* "%E?" alternative conversion modifier. */
161 LEGAL_ALT(0);
162 alt_format |= ALT_E;
163 goto again;
164
165 case 'O': /* "%O?" alternative conversion modifier. */
166 LEGAL_ALT(0);
167 alt_format |= ALT_O;
168 goto again;
169
170 /*
171 * "Complex" conversion rules, implemented through recursion.
172 */
173 case 'c': /* Date and time, using the locale's format. */
174 new_fmt = _TIME_LOCALE(loc)->d_t_fmt;
175 state |= S_WDAY | S_MON | S_MDAY | S_YEAR;
176 goto recurse;
177
178 case 'D': /* The date as "%m/%d/%y". */
179 new_fmt = "%m/%d/%y";
180 LEGAL_ALT(0);
181 state |= S_MON | S_MDAY | S_YEAR;
182 goto recurse;
183
184 case 'F': /* The date as "%Y-%m-%d". */
185 new_fmt = "%Y-%m-%d";
186 LEGAL_ALT(0);
187 state |= S_MON | S_MDAY | S_YEAR;
188 goto recurse;
189
190 case 'R': /* The time as "%H:%M". */
191 new_fmt = "%H:%M";
192 LEGAL_ALT(0);
193 goto recurse;
194
195 case 'r': /* The time in 12-hour clock representation. */
196 new_fmt = _TIME_LOCALE(loc)->t_fmt_ampm;
197 LEGAL_ALT(0);
198 goto recurse;
199
200 case 'T': /* The time as "%H:%M:%S". */
201 new_fmt = "%H:%M:%S";
202 LEGAL_ALT(0);
203 goto recurse;
204
205 case 'X': /* The time, using the locale's format. */
206 new_fmt = _TIME_LOCALE(loc)->t_fmt;
207 goto recurse;
208
209 case 'x': /* The date, using the locale's format. */
210 new_fmt = _TIME_LOCALE(loc)->d_fmt;
211 state |= S_MON | S_MDAY | S_YEAR;
212 recurse:
213 bp = (const u_char *)strptime((const char *)bp,
214 new_fmt, tm);
215 LEGAL_ALT(ALT_E);
216 continue;
217
218 /*
219 * "Elementary" conversion rules.
220 */
221 case 'A': /* The day of week, using the locale's form. */
222 case 'a':
223 bp = find_string(bp, &tm->tm_wday,
224 _TIME_LOCALE(loc)->day, _TIME_LOCALE(loc)->abday, 7);
225 LEGAL_ALT(0);
226 state |= S_WDAY;
227 continue;
228
229 case 'B': /* The month, using the locale's form. */
230 case 'b':
231 case 'h':
232 bp = find_string(bp, &tm->tm_mon,
233 _TIME_LOCALE(loc)->mon, _TIME_LOCALE(loc)->abmon,
234 12);
235 LEGAL_ALT(0);
236 state |= S_MON;
237 continue;
238
239 case 'C': /* The century number. */
240 i = 20;
241 bp = conv_num(bp, &i, 0, 99);
242
243 i = i * 100 - TM_YEAR_BASE;
244 if (split_year)
245 i += tm->tm_year % 100;
246 split_year = 1;
247 tm->tm_year = i;
248 LEGAL_ALT(ALT_E);
249 state |= S_YEAR;
250 continue;
251
252 case 'd': /* The day of month. */
253 case 'e':
254 bp = conv_num(bp, &tm->tm_mday, 1, 31);
255 LEGAL_ALT(ALT_O);
256 state |= S_MDAY;
257 continue;
258
259 case 'k': /* The hour (24-hour clock representation). */
260 LEGAL_ALT(0);
261 /* FALLTHROUGH */
262 case 'H':
263 bp = conv_num(bp, &tm->tm_hour, 0, 23);
264 LEGAL_ALT(ALT_O);
265 continue;
266
267 case 'l': /* The hour (12-hour clock representation). */
268 LEGAL_ALT(0);
269 /* FALLTHROUGH */
270 case 'I':
271 bp = conv_num(bp, &tm->tm_hour, 1, 12);
272 if (tm->tm_hour == 12)
273 tm->tm_hour = 0;
274 LEGAL_ALT(ALT_O);
275 continue;
276
277 case 'j': /* The day of year. */
278 i = 1;
279 bp = conv_num(bp, &i, 1, 366);
280 tm->tm_yday = i - 1;
281 LEGAL_ALT(0);
282 state |= S_YDAY;
283 continue;
284
285 case 'M': /* The minute. */
286 bp = conv_num(bp, &tm->tm_min, 0, 59);
287 LEGAL_ALT(ALT_O);
288 continue;
289
290 case 'm': /* The month. */
291 i = 1;
292 bp = conv_num(bp, &i, 1, 12);
293 tm->tm_mon = i - 1;
294 LEGAL_ALT(ALT_O);
295 state |= S_MON;
296 continue;
297
298 case 'p': /* The locale's equivalent of AM/PM. */
299 bp = find_string(bp, &i, _TIME_LOCALE(loc)->am_pm,
300 NULL, 2);
301 if (tm->tm_hour > 11)
302 return NULL;
303 tm->tm_hour += i * 12;
304 LEGAL_ALT(0);
305 continue;
306
307 case 'S': /* The seconds. */
308 bp = conv_num(bp, &tm->tm_sec, 0, 61);
309 LEGAL_ALT(ALT_O);
310 continue;
311
312 #ifndef TIME_MAX
313 #define TIME_MAX INT64_MAX
314 #endif
315 case 's': /* seconds since the epoch */
316 {
317 time_t sse = 0;
318 uint64_t rulim = TIME_MAX;
319
320 if (*bp < '0' || *bp > '9') {
321 bp = NULL;
322 continue;
323 }
324
325 do {
326 sse *= 10;
327 sse += *bp++ - '0';
328 rulim /= 10;
329 } while ((sse * 10 <= TIME_MAX) &&
330 rulim && *bp >= '0' && *bp <= '9');
331
332 if (sse < 0 || (uint64_t)sse > TIME_MAX) {
333 bp = NULL;
334 continue;
335 }
336
337 if (localtime_r(&sse, tm) == NULL)
338 bp = NULL;
339 else
340 state |= S_YDAY | S_WDAY |
341 S_MON | S_MDAY | S_YEAR;
342 }
343 continue;
344
345 case 'U': /* The week of year, beginning on sunday. */
346 case 'W': /* The week of year, beginning on monday. */
347 /*
348 * XXX This is bogus, as we can not assume any valid
349 * information present in the tm structure at this
350 * point to calculate a real value, so just check the
351 * range for now.
352 */
353 bp = conv_num(bp, &i, 0, 53);
354 LEGAL_ALT(ALT_O);
355 if (c == 'U')
356 day_offset = TM_SUNDAY;
357 else
358 day_offset = TM_MONDAY;
359 week_offset = i;
360 continue;
361
362 case 'w': /* The day of week, beginning on sunday. */
363 bp = conv_num(bp, &tm->tm_wday, 0, 6);
364 LEGAL_ALT(ALT_O);
365 state |= S_WDAY;
366 continue;
367
368 case 'u': /* The day of week, monday = 1. */
369 bp = conv_num(bp, &i, 1, 7);
370 tm->tm_wday = i % 7;
371 LEGAL_ALT(ALT_O);
372 state |= S_WDAY;
373 continue;
374
375 case 'g': /* The year corresponding to the ISO week
376 * number but without the century.
377 */
378 bp = conv_num(bp, &i, 0, 99);
379 continue;
380
381 case 'G': /* The year corresponding to the ISO week
382 * number with century.
383 */
384 do
385 bp++;
386 while (isdigit(*bp));
387 continue;
388
389 case 'V': /* The ISO 8601:1988 week number as decimal */
390 bp = conv_num(bp, &i, 0, 53);
391 continue;
392
393 case 'Y': /* The year. */
394 i = TM_YEAR_BASE; /* just for data sanity... */
395 bp = conv_num(bp, &i, 0, 9999);
396 tm->tm_year = i - TM_YEAR_BASE;
397 LEGAL_ALT(ALT_E);
398 state |= S_YEAR;
399 continue;
400
401 case 'y': /* The year within 100 years of the epoch. */
402 /* LEGAL_ALT(ALT_E | ALT_O); */
403 bp = conv_num(bp, &i, 0, 99);
404
405 if (split_year)
406 /* preserve century */
407 i += (tm->tm_year / 100) * 100;
408 else {
409 split_year = 1;
410 if (i <= 68)
411 i = i + 2000 - TM_YEAR_BASE;
412 else
413 i = i + 1900 - TM_YEAR_BASE;
414 }
415 tm->tm_year = i;
416 state |= S_YEAR;
417 continue;
418
419 case 'Z':
420 tzset();
421 if (strncmp((const char *)bp, gmt, 3) == 0 ||
422 strncmp((const char *)bp, utc, 3) == 0) {
423 tm->tm_isdst = 0;
424 #ifdef TM_GMTOFF
425 tm->TM_GMTOFF = 0;
426 #endif
427 #ifdef TM_ZONE
428 tm->TM_ZONE = gmt;
429 #endif
430 bp += 3;
431 } else {
432 ep = find_string(bp, &i,
433 (const char * const *)tzname,
434 NULL, 2);
435 if (ep != NULL) {
436 tm->tm_isdst = i;
437 #ifdef TM_GMTOFF
438 tm->TM_GMTOFF = -(timezone);
439 #endif
440 #ifdef TM_ZONE
441 tm->TM_ZONE = tzname[i];
442 #endif
443 }
444 bp = ep;
445 }
446 continue;
447
448 case 'z':
449 /*
450 * We recognize all ISO 8601 formats:
451 * Z = Zulu time/UTC
452 * [+-]hhmm
453 * [+-]hh:mm
454 * [+-]hh
455 * We recognize all RFC-822/RFC-2822 formats:
456 * UT|GMT
457 * North American : UTC offsets
458 * E[DS]T = Eastern : -4 | -5
459 * C[DS]T = Central : -5 | -6
460 * M[DS]T = Mountain: -6 | -7
461 * P[DS]T = Pacific : -7 | -8
462 * Military
463 * [A-IL-M] = -1 ... -9 (J not used)
464 * [N-Y] = +1 ... +12
465 */
466 while (isspace(*bp))
467 bp++;
468
469 switch (*bp++) {
470 case 'G':
471 if (*bp++ != 'M')
472 return NULL;
473 /*FALLTHROUGH*/
474 case 'U':
475 if (*bp++ != 'T')
476 return NULL;
477 /*FALLTHROUGH*/
478 case 'Z':
479 tm->tm_isdst = 0;
480 #ifdef TM_GMTOFF
481 tm->TM_GMTOFF = 0;
482 #endif
483 #ifdef TM_ZONE
484 tm->TM_ZONE = utc;
485 #endif
486 continue;
487 case '+':
488 neg = 0;
489 break;
490 case '-':
491 neg = 1;
492 break;
493 default:
494 --bp;
495 ep = find_string(bp, &i, nast, NULL, 4);
496 if (ep != NULL) {
497 #ifdef TM_GMTOFF
498 tm->TM_GMTOFF = -5 - i;
499 #endif
500 #ifdef TM_ZONE
501 tm->TM_ZONE = __UNCONST(nast[i]);
502 #endif
503 bp = ep;
504 continue;
505 }
506 ep = find_string(bp, &i, nadt, NULL, 4);
507 if (ep != NULL) {
508 tm->tm_isdst = 1;
509 #ifdef TM_GMTOFF
510 tm->TM_GMTOFF = -4 - i;
511 #endif
512 #ifdef TM_ZONE
513 tm->TM_ZONE = __UNCONST(nadt[i]);
514 #endif
515 bp = ep;
516 continue;
517 }
518
519 if ((*bp >= 'A' && *bp <= 'I') ||
520 (*bp >= 'L' && *bp <= 'Y')) {
521 #ifdef TM_GMTOFF
522 /* Argh! No 'J'! */
523 if (*bp >= 'A' && *bp <= 'I')
524 tm->TM_GMTOFF =
525 ('A' - 1) - (int)*bp;
526 else if (*bp >= 'L' && *bp <= 'M')
527 tm->TM_GMTOFF = 'A' - (int)*bp;
528 else if (*bp >= 'N' && *bp <= 'Y')
529 tm->TM_GMTOFF = (int)*bp - 'M';
530 #endif
531 #ifdef TM_ZONE
532 tm->TM_ZONE = NULL; /* XXX */
533 #endif
534 bp++;
535 continue;
536 }
537 return NULL;
538 }
539 offs = 0;
540 for (i = 0; i < 4; ) {
541 if (isdigit(*bp)) {
542 offs = offs * 10 + (*bp++ - '0');
543 i++;
544 continue;
545 }
546 if (i == 2 && *bp == ':') {
547 bp++;
548 continue;
549 }
550 break;
551 }
552 switch (i) {
553 case 2:
554 offs *= 100;
555 break;
556 case 4:
557 i = offs % 100;
558 if (i >= 60)
559 return NULL;
560 /* Convert minutes into decimal */
561 offs = (offs / 100) * 100 + (i * 50) / 30;
562 break;
563 default:
564 return NULL;
565 }
566 if (neg)
567 offs = -offs;
568 tm->tm_isdst = 0; /* XXX */
569 #ifdef TM_GMTOFF
570 tm->TM_GMTOFF = offs;
571 #endif
572 #ifdef TM_ZONE
573 tm->TM_ZONE = NULL; /* XXX */
574 #endif
575 continue;
576
577 /*
578 * Miscellaneous conversions.
579 */
580 case 'n': /* Any kind of white-space. */
581 case 't':
582 while (isspace(*bp))
583 bp++;
584 LEGAL_ALT(0);
585 continue;
586
587
588 default: /* Unknown/unsupported conversion. */
589 return NULL;
590 }
591 }
592
593 if (!HAVE_YDAY(state) && HAVE_YEAR(state)) {
594 if (HAVE_MON(state) && HAVE_MDAY(state)) {
595 /* calculate day of year (ordinal date) */
596 tm->tm_yday = start_of_month[isleap_sum(tm->tm_year,
597 TM_YEAR_BASE)][tm->tm_mon] + (tm->tm_mday - 1);
598 state |= S_YDAY;
599 } else if (day_offset != -1) {
600 /*
601 * Set the date to the first Sunday (or Monday)
602 * of the specified week of the year.
603 */
604 if (!HAVE_WDAY(state)) {
605 tm->tm_wday = day_offset;
606 state |= S_WDAY;
607 }
608 tm->tm_yday = (7 -
609 first_wday_of(tm->tm_year + TM_YEAR_BASE) +
610 day_offset) % 7 + (week_offset - 1) * 7 +
611 tm->tm_wday - day_offset;
612 state |= S_YDAY;
613 }
614 }
615
616 if (HAVE_YDAY(state) && HAVE_YEAR(state)) {
617 int isleap;
618
619 if (!HAVE_MON(state)) {
620 /* calculate month of day of year */
621 i = 0;
622 isleap = isleap_sum(tm->tm_year, TM_YEAR_BASE);
623 while (tm->tm_yday >= start_of_month[isleap][i])
624 i++;
625 if (i > 12) {
626 i = 1;
627 tm->tm_yday -= start_of_month[isleap][12];
628 tm->tm_year++;
629 }
630 tm->tm_mon = i - 1;
631 state |= S_MON;
632 }
633
634 if (!HAVE_MDAY(state)) {
635 /* calculate day of month */
636 isleap = isleap_sum(tm->tm_year, TM_YEAR_BASE);
637 tm->tm_mday = tm->tm_yday -
638 start_of_month[isleap][tm->tm_mon] + 1;
639 state |= S_MDAY;
640 }
641
642 if (!HAVE_WDAY(state)) {
643 /* calculate day of week */
644 i = 0;
645 week_offset = first_wday_of(tm->tm_year);
646 while (i++ <= tm->tm_yday) {
647 if (week_offset++ >= 6)
648 week_offset = 0;
649 }
650 tm->tm_wday = week_offset;
651 state |= S_WDAY;
652 }
653 }
654
655 return __UNCONST(bp);
656 }
657
658
659 static const u_char *
660 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
661 {
662 uint result = 0;
663 unsigned char ch;
664
665 /* The limit also determines the number of valid digits. */
666 uint rulim = ulim;
667
668 ch = *buf;
669 if (ch < '0' || ch > '9')
670 return NULL;
671
672 do {
673 result *= 10;
674 result += ch - '0';
675 rulim /= 10;
676 ch = *++buf;
677 } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
678
679 if (result < llim || result > ulim)
680 return NULL;
681
682 *dest = result;
683 return buf;
684 }
685
686 static const u_char *
687 find_string(const u_char *bp, int *tgt, const char * const *n1,
688 const char * const *n2, int c)
689 {
690 int i;
691 size_t len;
692
693 /* check full name - then abbreviated ones */
694 for (; n1 != NULL; n1 = n2, n2 = NULL) {
695 for (i = 0; i < c; i++, n1++) {
696 len = strlen(*n1);
697 if (strncasecmp(*n1, (const char *)bp, len) == 0) {
698 *tgt = i;
699 return bp + len;
700 }
701 }
702 }
703
704 /* Nothing matched */
705 return NULL;
706 }
707