strptime.c revision 1.42 1 /* $NetBSD: strptime.c,v 1.42 2015/07/08 19:48:20 ginsbach Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code was contributed to The NetBSD Foundation by Klaus Klein.
8 * Heavily optimised by David Laight
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #if defined(LIBC_SCCS) && !defined(lint)
34 __RCSID("$NetBSD: strptime.c,v 1.42 2015/07/08 19:48:20 ginsbach Exp $");
35 #endif
36
37 #include "namespace.h"
38 #include <sys/localedef.h>
39 #include <sys/types.h>
40 #include <sys/clock.h>
41 #include <ctype.h>
42 #include <locale.h>
43 #include <string.h>
44 #include <time.h>
45 #include <tzfile.h>
46 #include "private.h"
47 #include "setlocale_local.h"
48
49 #ifdef __weak_alias
50 __weak_alias(strptime,_strptime)
51 __weak_alias(strptime_l, _strptime_l)
52 #endif
53
54 #define _TIME_LOCALE(loc) \
55 ((_TimeLocale *)((loc)->part_impl[(size_t)LC_TIME]))
56
57 /*
58 * We do not implement alternate representations. However, we always
59 * check whether a given modifier is allowed for a certain conversion.
60 */
61 #define ALT_E 0x01
62 #define ALT_O 0x02
63 #define LEGAL_ALT(x) { if (alt_format & ~(x)) return NULL; }
64
65 #define S_YEAR (1 << 0)
66 #define S_MON (1 << 1)
67 #define S_YDAY (1 << 2)
68 #define S_MDAY (1 << 3)
69 #define S_WDAY (1 << 4)
70
71 #define HAVE_MDAY(s) (s & S_MDAY)
72 #define HAVE_MON(s) (s & S_MON)
73 #define HAVE_WDAY(s) (s & S_WDAY)
74 #define HAVE_YDAY(s) (s & S_YDAY)
75 #define HAVE_YEAR(s) (s & S_YEAR)
76
77 static char gmt[] = { "GMT" };
78 static char utc[] = { "UTC" };
79 /* RFC-822/RFC-2822 */
80 static const char * const nast[5] = {
81 "EST", "CST", "MST", "PST", "\0\0\0"
82 };
83 static const char * const nadt[5] = {
84 "EDT", "CDT", "MDT", "PDT", "\0\0\0"
85 };
86
87 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
88 static const u_char *find_string(const u_char *, int *, const char * const *,
89 const char * const *, int);
90
91 static const int start_of_month[2][13] = {
92 { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
93 { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
94 };
95
96 /*
97 * Calculate the week day of the first day of a year. Valid for
98 * the Gregorian calendar, which began Sept 14, 1752 in the UK
99 * and its colonies. Ref:
100 * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week
101 */
102
103 static int
104 first_wday_of(int yr)
105 {
106 return ((2 * (3 - (yr / 100) % 4)) + (yr % 100) + ((yr % 100) / 4) +
107 (is_leap_year(yr) ? 6 : 0) + 1) % 7;
108 }
109
110 char *
111 strptime(const char *buf, const char *fmt, struct tm *tm)
112 {
113 return strptime_l(buf, fmt, tm, _current_locale());
114 }
115
116 char *
117 strptime_l(const char *buf, const char *fmt, struct tm *tm, locale_t loc)
118 {
119 unsigned char c;
120 const unsigned char *bp, *ep;
121 int alt_format, i, split_year = 0, neg = 0, state = 0,
122 day_offset = -1, week_offset = 0, offs;
123 const char *new_fmt;
124
125 bp = (const u_char *)buf;
126
127 while (bp != NULL && (c = *fmt++) != '\0') {
128 /* Clear `alternate' modifier prior to new conversion. */
129 alt_format = 0;
130 i = 0;
131
132 /* Eat up white-space. */
133 if (isspace(c)) {
134 while (isspace(*bp))
135 bp++;
136 continue;
137 }
138
139 if (c != '%')
140 goto literal;
141
142
143 again: switch (c = *fmt++) {
144 case '%': /* "%%" is converted to "%". */
145 literal:
146 if (c != *bp++)
147 return NULL;
148 LEGAL_ALT(0);
149 continue;
150
151 /*
152 * "Alternative" modifiers. Just set the appropriate flag
153 * and start over again.
154 */
155 case 'E': /* "%E?" alternative conversion modifier. */
156 LEGAL_ALT(0);
157 alt_format |= ALT_E;
158 goto again;
159
160 case 'O': /* "%O?" alternative conversion modifier. */
161 LEGAL_ALT(0);
162 alt_format |= ALT_O;
163 goto again;
164
165 /*
166 * "Complex" conversion rules, implemented through recursion.
167 */
168 case 'c': /* Date and time, using the locale's format. */
169 new_fmt = _TIME_LOCALE(loc)->d_t_fmt;
170 state |= S_WDAY | S_MON | S_MDAY | S_YEAR;
171 goto recurse;
172
173 case 'D': /* The date as "%m/%d/%y". */
174 new_fmt = "%m/%d/%y";
175 LEGAL_ALT(0);
176 state |= S_MON | S_MDAY | S_YEAR;
177 goto recurse;
178
179 case 'F': /* The date as "%Y-%m-%d". */
180 new_fmt = "%Y-%m-%d";
181 LEGAL_ALT(0);
182 state |= S_MON | S_MDAY | S_YEAR;
183 goto recurse;
184
185 case 'R': /* The time as "%H:%M". */
186 new_fmt = "%H:%M";
187 LEGAL_ALT(0);
188 goto recurse;
189
190 case 'r': /* The time in 12-hour clock representation. */
191 new_fmt = _TIME_LOCALE(loc)->t_fmt_ampm;
192 LEGAL_ALT(0);
193 goto recurse;
194
195 case 'T': /* The time as "%H:%M:%S". */
196 new_fmt = "%H:%M:%S";
197 LEGAL_ALT(0);
198 goto recurse;
199
200 case 'X': /* The time, using the locale's format. */
201 new_fmt = _TIME_LOCALE(loc)->t_fmt;
202 goto recurse;
203
204 case 'x': /* The date, using the locale's format. */
205 new_fmt = _TIME_LOCALE(loc)->d_fmt;
206 state |= S_MON | S_MDAY | S_YEAR;
207 recurse:
208 bp = (const u_char *)strptime((const char *)bp,
209 new_fmt, tm);
210 LEGAL_ALT(ALT_E);
211 continue;
212
213 /*
214 * "Elementary" conversion rules.
215 */
216 case 'A': /* The day of week, using the locale's form. */
217 case 'a':
218 bp = find_string(bp, &tm->tm_wday,
219 _TIME_LOCALE(loc)->day, _TIME_LOCALE(loc)->abday, 7);
220 LEGAL_ALT(0);
221 state |= S_WDAY;
222 continue;
223
224 case 'B': /* The month, using the locale's form. */
225 case 'b':
226 case 'h':
227 bp = find_string(bp, &tm->tm_mon,
228 _TIME_LOCALE(loc)->mon, _TIME_LOCALE(loc)->abmon,
229 12);
230 LEGAL_ALT(0);
231 state |= S_MON;
232 continue;
233
234 case 'C': /* The century number. */
235 i = 20;
236 bp = conv_num(bp, &i, 0, 99);
237
238 i = i * 100 - TM_YEAR_BASE;
239 if (split_year)
240 i += tm->tm_year % 100;
241 split_year = 1;
242 tm->tm_year = i;
243 LEGAL_ALT(ALT_E);
244 state |= S_YEAR;
245 continue;
246
247 case 'd': /* The day of month. */
248 case 'e':
249 bp = conv_num(bp, &tm->tm_mday, 1, 31);
250 LEGAL_ALT(ALT_O);
251 state |= S_MDAY;
252 continue;
253
254 case 'k': /* The hour (24-hour clock representation). */
255 LEGAL_ALT(0);
256 /* FALLTHROUGH */
257 case 'H':
258 bp = conv_num(bp, &tm->tm_hour, 0, 23);
259 LEGAL_ALT(ALT_O);
260 continue;
261
262 case 'l': /* The hour (12-hour clock representation). */
263 LEGAL_ALT(0);
264 /* FALLTHROUGH */
265 case 'I':
266 bp = conv_num(bp, &tm->tm_hour, 1, 12);
267 if (tm->tm_hour == 12)
268 tm->tm_hour = 0;
269 LEGAL_ALT(ALT_O);
270 continue;
271
272 case 'j': /* The day of year. */
273 i = 1;
274 bp = conv_num(bp, &i, 1, 366);
275 tm->tm_yday = i - 1;
276 LEGAL_ALT(0);
277 state |= S_YDAY;
278 continue;
279
280 case 'M': /* The minute. */
281 bp = conv_num(bp, &tm->tm_min, 0, 59);
282 LEGAL_ALT(ALT_O);
283 continue;
284
285 case 'm': /* The month. */
286 i = 1;
287 bp = conv_num(bp, &i, 1, 12);
288 tm->tm_mon = i - 1;
289 LEGAL_ALT(ALT_O);
290 state |= S_MON;
291 continue;
292
293 case 'p': /* The locale's equivalent of AM/PM. */
294 bp = find_string(bp, &i, _TIME_LOCALE(loc)->am_pm,
295 NULL, 2);
296 if (tm->tm_hour > 11)
297 return NULL;
298 tm->tm_hour += i * 12;
299 LEGAL_ALT(0);
300 continue;
301
302 case 'S': /* The seconds. */
303 bp = conv_num(bp, &tm->tm_sec, 0, 61);
304 LEGAL_ALT(ALT_O);
305 continue;
306
307 #ifndef TIME_MAX
308 #define TIME_MAX INT64_MAX
309 #endif
310 case 's': /* seconds since the epoch */
311 {
312 time_t sse = 0;
313 uint64_t rulim = TIME_MAX;
314
315 if (*bp < '0' || *bp > '9') {
316 bp = NULL;
317 continue;
318 }
319
320 do {
321 sse *= 10;
322 sse += *bp++ - '0';
323 rulim /= 10;
324 } while ((sse * 10 <= TIME_MAX) &&
325 rulim && *bp >= '0' && *bp <= '9');
326
327 if (sse < 0 || (uint64_t)sse > TIME_MAX) {
328 bp = NULL;
329 continue;
330 }
331
332 if (localtime_r(&sse, tm) == NULL)
333 bp = NULL;
334 else
335 state |= S_YDAY | S_WDAY |
336 S_MON | S_MDAY | S_YEAR;
337 }
338 continue;
339
340 case 'U': /* The week of year, beginning on sunday. */
341 case 'W': /* The week of year, beginning on monday. */
342 /*
343 * XXX This is bogus, as we can not assume any valid
344 * information present in the tm structure at this
345 * point to calculate a real value, so just check the
346 * range for now.
347 */
348 bp = conv_num(bp, &i, 0, 53);
349 LEGAL_ALT(ALT_O);
350 if (c == 'U')
351 day_offset = TM_SUNDAY;
352 else
353 day_offset = TM_MONDAY;
354 week_offset = i;
355 continue;
356
357 case 'w': /* The day of week, beginning on sunday. */
358 bp = conv_num(bp, &tm->tm_wday, 0, 6);
359 LEGAL_ALT(ALT_O);
360 state |= S_WDAY;
361 continue;
362
363 case 'u': /* The day of week, monday = 1. */
364 bp = conv_num(bp, &i, 1, 7);
365 tm->tm_wday = i % 7;
366 LEGAL_ALT(ALT_O);
367 continue;
368
369 case 'g': /* The year corresponding to the ISO week
370 * number but without the century.
371 */
372 bp = conv_num(bp, &i, 0, 99);
373 continue;
374
375 case 'G': /* The year corresponding to the ISO week
376 * number with century.
377 */
378 do
379 bp++;
380 while (isdigit(*bp));
381 continue;
382
383 case 'V': /* The ISO 8601:1988 week number as decimal */
384 bp = conv_num(bp, &i, 0, 53);
385 continue;
386
387 case 'Y': /* The year. */
388 i = TM_YEAR_BASE; /* just for data sanity... */
389 bp = conv_num(bp, &i, 0, 9999);
390 tm->tm_year = i - TM_YEAR_BASE;
391 LEGAL_ALT(ALT_E);
392 state |= S_YEAR;
393 continue;
394
395 case 'y': /* The year within 100 years of the epoch. */
396 /* LEGAL_ALT(ALT_E | ALT_O); */
397 bp = conv_num(bp, &i, 0, 99);
398
399 if (split_year)
400 /* preserve century */
401 i += (tm->tm_year / 100) * 100;
402 else {
403 split_year = 1;
404 if (i <= 68)
405 i = i + 2000 - TM_YEAR_BASE;
406 else
407 i = i + 1900 - TM_YEAR_BASE;
408 }
409 tm->tm_year = i;
410 state |= S_YEAR;
411 continue;
412
413 case 'Z':
414 tzset();
415 if (strncmp((const char *)bp, gmt, 3) == 0 ||
416 strncmp((const char *)bp, utc, 3) == 0) {
417 tm->tm_isdst = 0;
418 #ifdef TM_GMTOFF
419 tm->TM_GMTOFF = 0;
420 #endif
421 #ifdef TM_ZONE
422 tm->TM_ZONE = gmt;
423 #endif
424 bp += 3;
425 } else {
426 ep = find_string(bp, &i,
427 (const char * const *)tzname,
428 NULL, 2);
429 if (ep != NULL) {
430 tm->tm_isdst = i;
431 #ifdef TM_GMTOFF
432 tm->TM_GMTOFF = -(timezone);
433 #endif
434 #ifdef TM_ZONE
435 tm->TM_ZONE = tzname[i];
436 #endif
437 }
438 bp = ep;
439 }
440 continue;
441
442 case 'z':
443 /*
444 * We recognize all ISO 8601 formats:
445 * Z = Zulu time/UTC
446 * [+-]hhmm
447 * [+-]hh:mm
448 * [+-]hh
449 * We recognize all RFC-822/RFC-2822 formats:
450 * UT|GMT
451 * North American : UTC offsets
452 * E[DS]T = Eastern : -4 | -5
453 * C[DS]T = Central : -5 | -6
454 * M[DS]T = Mountain: -6 | -7
455 * P[DS]T = Pacific : -7 | -8
456 * Military
457 * [A-IL-M] = -1 ... -9 (J not used)
458 * [N-Y] = +1 ... +12
459 */
460 while (isspace(*bp))
461 bp++;
462
463 switch (*bp++) {
464 case 'G':
465 if (*bp++ != 'M')
466 return NULL;
467 /*FALLTHROUGH*/
468 case 'U':
469 if (*bp++ != 'T')
470 return NULL;
471 /*FALLTHROUGH*/
472 case 'Z':
473 tm->tm_isdst = 0;
474 #ifdef TM_GMTOFF
475 tm->TM_GMTOFF = 0;
476 #endif
477 #ifdef TM_ZONE
478 tm->TM_ZONE = utc;
479 #endif
480 continue;
481 case '+':
482 neg = 0;
483 state |= S_WDAY | S_MON | S_MDAY | S_YEAR;
484 break;
485 case '-':
486 neg = 1;
487 break;
488 default:
489 --bp;
490 ep = find_string(bp, &i, nast, NULL, 4);
491 if (ep != NULL) {
492 #ifdef TM_GMTOFF
493 tm->TM_GMTOFF = -5 - i;
494 #endif
495 #ifdef TM_ZONE
496 tm->TM_ZONE = __UNCONST(nast[i]);
497 #endif
498 bp = ep;
499 continue;
500 }
501 ep = find_string(bp, &i, nadt, NULL, 4);
502 if (ep != NULL) {
503 tm->tm_isdst = 1;
504 #ifdef TM_GMTOFF
505 tm->TM_GMTOFF = -4 - i;
506 #endif
507 #ifdef TM_ZONE
508 tm->TM_ZONE = __UNCONST(nadt[i]);
509 #endif
510 bp = ep;
511 continue;
512 }
513
514 if ((*bp >= 'A' && *bp <= 'I') ||
515 (*bp >= 'L' && *bp <= 'Y')) {
516 #ifdef TM_GMTOFF
517 /* Argh! No 'J'! */
518 if (*bp >= 'A' && *bp <= 'I')
519 tm->TM_GMTOFF =
520 ('A' - 1) - (int)*bp;
521 else if (*bp >= 'L' && *bp <= 'M')
522 tm->TM_GMTOFF = 'A' - (int)*bp;
523 else if (*bp >= 'N' && *bp <= 'Y')
524 tm->TM_GMTOFF = (int)*bp - 'M';
525 #endif
526 #ifdef TM_ZONE
527 tm->TM_ZONE = NULL; /* XXX */
528 #endif
529 bp++;
530 continue;
531 }
532 return NULL;
533 }
534 offs = 0;
535 for (i = 0; i < 4; ) {
536 if (isdigit(*bp)) {
537 offs = offs * 10 + (*bp++ - '0');
538 i++;
539 continue;
540 }
541 if (i == 2 && *bp == ':') {
542 bp++;
543 continue;
544 }
545 break;
546 }
547 switch (i) {
548 case 2:
549 offs *= 100;
550 break;
551 case 4:
552 i = offs % 100;
553 if (i >= 60)
554 return NULL;
555 /* Convert minutes into decimal */
556 offs = (offs / 100) * 100 + (i * 50) / 30;
557 break;
558 default:
559 return NULL;
560 }
561 if (neg)
562 offs = -offs;
563 tm->tm_isdst = 0; /* XXX */
564 #ifdef TM_GMTOFF
565 tm->TM_GMTOFF = offs;
566 #endif
567 #ifdef TM_ZONE
568 tm->TM_ZONE = NULL; /* XXX */
569 #endif
570 continue;
571
572 /*
573 * Miscellaneous conversions.
574 */
575 case 'n': /* Any kind of white-space. */
576 case 't':
577 while (isspace(*bp))
578 bp++;
579 LEGAL_ALT(0);
580 continue;
581
582
583 default: /* Unknown/unsupported conversion. */
584 return NULL;
585 }
586 }
587
588 if (!HAVE_YDAY(state) && HAVE_YEAR(state)) {
589 if (HAVE_MON(state) && HAVE_MDAY(state)) {
590 tm->tm_yday = start_of_month[is_leap_year(tm->tm_year +
591 TM_YEAR_BASE)][tm->tm_mon] + (tm->tm_mday - 1);
592 state |= S_YDAY;
593 } else if (day_offset != -1) {
594 /* Set the date to the first Sunday (or Monday)
595 * of the specified week of the year.
596 */
597 if (!HAVE_WDAY(state)) {
598 tm->tm_wday = day_offset;
599 state |= S_WDAY;
600 }
601 tm->tm_yday = (7 -
602 first_wday_of(tm->tm_year + TM_YEAR_BASE) +
603 day_offset) % 7 + (week_offset - 1) * 7 +
604 tm->tm_wday - day_offset;
605 state |= S_YDAY;
606 }
607 }
608
609 if (HAVE_YDAY(state) && HAVE_YEAR(state)) {
610 int isleap;
611 if (!HAVE_MON(state)) {
612 i = 0;
613 isleap = is_leap_year(tm->tm_year + TM_YEAR_BASE);
614 while (tm->tm_yday >= start_of_month[isleap][i])
615 i++;
616 if (i > 12) {
617 i = 1;
618 tm->tm_yday -= start_of_month[isleap][12];
619 tm->tm_year++;
620 }
621 tm->tm_mon = i - 1;
622 state |= S_MON;
623 }
624 if (!HAVE_MDAY(state)) {
625 isleap = is_leap_year(tm->tm_year + TM_YEAR_BASE);
626 tm->tm_mday = tm->tm_yday -
627 start_of_month[isleap][tm->tm_mon] + 1;
628 state |= S_MDAY;
629 }
630 if (!HAVE_WDAY(state)) {
631 i = 0;
632 week_offset = first_wday_of(tm->tm_year);
633 while (i++ <= tm->tm_yday) {
634 if (week_offset++ >= 6)
635 week_offset = 0;
636 }
637 tm->tm_wday = week_offset;
638 state |= S_WDAY;
639 }
640 }
641
642 return __UNCONST(bp);
643 }
644
645
646 static const u_char *
647 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
648 {
649 uint result = 0;
650 unsigned char ch;
651
652 /* The limit also determines the number of valid digits. */
653 uint rulim = ulim;
654
655 ch = *buf;
656 if (ch < '0' || ch > '9')
657 return NULL;
658
659 do {
660 result *= 10;
661 result += ch - '0';
662 rulim /= 10;
663 ch = *++buf;
664 } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
665
666 if (result < llim || result > ulim)
667 return NULL;
668
669 *dest = result;
670 return buf;
671 }
672
673 static const u_char *
674 find_string(const u_char *bp, int *tgt, const char * const *n1,
675 const char * const *n2, int c)
676 {
677 int i;
678 size_t len;
679
680 /* check full name - then abbreviated ones */
681 for (; n1 != NULL; n1 = n2, n2 = NULL) {
682 for (i = 0; i < c; i++, n1++) {
683 len = strlen(*n1);
684 if (strncasecmp(*n1, (const char *)bp, len) == 0) {
685 *tgt = i;
686 return bp + len;
687 }
688 }
689 }
690
691 /* Nothing matched */
692 return NULL;
693 }
694