strptime.c revision 1.43 1 /* $NetBSD: strptime.c,v 1.43 2015/07/13 17:45:16 ginsbach Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code was contributed to The NetBSD Foundation by Klaus Klein.
8 * Heavily optimised by David Laight
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #if defined(LIBC_SCCS) && !defined(lint)
34 __RCSID("$NetBSD: strptime.c,v 1.43 2015/07/13 17:45:16 ginsbach Exp $");
35 #endif
36
37 #include "namespace.h"
38 #include <sys/localedef.h>
39 #include <sys/types.h>
40 #include <ctype.h>
41 #include <locale.h>
42 #include <string.h>
43 #include <time.h>
44 #include <tzfile.h>
45 #include "private.h"
46 #include "setlocale_local.h"
47
48 #ifdef __weak_alias
49 __weak_alias(strptime,_strptime)
50 __weak_alias(strptime_l, _strptime_l)
51 #endif
52
53 #define _TIME_LOCALE(loc) \
54 ((_TimeLocale *)((loc)->part_impl[(size_t)LC_TIME]))
55
56 /*
57 * We do not implement alternate representations. However, we always
58 * check whether a given modifier is allowed for a certain conversion.
59 */
60 #define ALT_E 0x01
61 #define ALT_O 0x02
62 #define LEGAL_ALT(x) { if (alt_format & ~(x)) return NULL; }
63
64 #define S_YEAR (1 << 0)
65 #define S_MON (1 << 1)
66 #define S_YDAY (1 << 2)
67 #define S_MDAY (1 << 3)
68 #define S_WDAY (1 << 4)
69
70 #define HAVE_MDAY(s) (s & S_MDAY)
71 #define HAVE_MON(s) (s & S_MON)
72 #define HAVE_WDAY(s) (s & S_WDAY)
73 #define HAVE_YDAY(s) (s & S_YDAY)
74 #define HAVE_YEAR(s) (s & S_YEAR)
75
76 static char gmt[] = { "GMT" };
77 static char utc[] = { "UTC" };
78 /* RFC-822/RFC-2822 */
79 static const char * const nast[5] = {
80 "EST", "CST", "MST", "PST", "\0\0\0"
81 };
82 static const char * const nadt[5] = {
83 "EDT", "CDT", "MDT", "PDT", "\0\0\0"
84 };
85
86 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
87 static const u_char *find_string(const u_char *, int *, const char * const *,
88 const char * const *, int);
89
90 static const int start_of_month[2][13] = {
91 { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
92 { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
93 };
94
95 /*
96 * Calculate the week day of the first day of a year. Valid for
97 * the Gregorian calendar, which began Sept 14, 1752 in the UK
98 * and its colonies. Ref:
99 * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week
100 */
101
102 static int
103 first_wday_of(int yr)
104 {
105 return ((2 * (3 - (yr / 100) % 4)) + (yr % 100) + ((yr % 100) / 4) +
106 (isleap(yr) ? 6 : 0) + 1) % 7;
107 }
108
109 char *
110 strptime(const char *buf, const char *fmt, struct tm *tm)
111 {
112 return strptime_l(buf, fmt, tm, _current_locale());
113 }
114
115 char *
116 strptime_l(const char *buf, const char *fmt, struct tm *tm, locale_t loc)
117 {
118 unsigned char c;
119 const unsigned char *bp, *ep;
120 int alt_format, i, split_year = 0, neg = 0, state = 0,
121 day_offset = -1, week_offset = 0, offs;
122 const char *new_fmt;
123
124 bp = (const u_char *)buf;
125
126 while (bp != NULL && (c = *fmt++) != '\0') {
127 /* Clear `alternate' modifier prior to new conversion. */
128 alt_format = 0;
129 i = 0;
130
131 /* Eat up white-space. */
132 if (isspace(c)) {
133 while (isspace(*bp))
134 bp++;
135 continue;
136 }
137
138 if (c != '%')
139 goto literal;
140
141
142 again: switch (c = *fmt++) {
143 case '%': /* "%%" is converted to "%". */
144 literal:
145 if (c != *bp++)
146 return NULL;
147 LEGAL_ALT(0);
148 continue;
149
150 /*
151 * "Alternative" modifiers. Just set the appropriate flag
152 * and start over again.
153 */
154 case 'E': /* "%E?" alternative conversion modifier. */
155 LEGAL_ALT(0);
156 alt_format |= ALT_E;
157 goto again;
158
159 case 'O': /* "%O?" alternative conversion modifier. */
160 LEGAL_ALT(0);
161 alt_format |= ALT_O;
162 goto again;
163
164 /*
165 * "Complex" conversion rules, implemented through recursion.
166 */
167 case 'c': /* Date and time, using the locale's format. */
168 new_fmt = _TIME_LOCALE(loc)->d_t_fmt;
169 state |= S_WDAY | S_MON | S_MDAY | S_YEAR;
170 goto recurse;
171
172 case 'D': /* The date as "%m/%d/%y". */
173 new_fmt = "%m/%d/%y";
174 LEGAL_ALT(0);
175 state |= S_MON | S_MDAY | S_YEAR;
176 goto recurse;
177
178 case 'F': /* The date as "%Y-%m-%d". */
179 new_fmt = "%Y-%m-%d";
180 LEGAL_ALT(0);
181 state |= S_MON | S_MDAY | S_YEAR;
182 goto recurse;
183
184 case 'R': /* The time as "%H:%M". */
185 new_fmt = "%H:%M";
186 LEGAL_ALT(0);
187 goto recurse;
188
189 case 'r': /* The time in 12-hour clock representation. */
190 new_fmt = _TIME_LOCALE(loc)->t_fmt_ampm;
191 LEGAL_ALT(0);
192 goto recurse;
193
194 case 'T': /* The time as "%H:%M:%S". */
195 new_fmt = "%H:%M:%S";
196 LEGAL_ALT(0);
197 goto recurse;
198
199 case 'X': /* The time, using the locale's format. */
200 new_fmt = _TIME_LOCALE(loc)->t_fmt;
201 goto recurse;
202
203 case 'x': /* The date, using the locale's format. */
204 new_fmt = _TIME_LOCALE(loc)->d_fmt;
205 state |= S_MON | S_MDAY | S_YEAR;
206 recurse:
207 bp = (const u_char *)strptime((const char *)bp,
208 new_fmt, tm);
209 LEGAL_ALT(ALT_E);
210 continue;
211
212 /*
213 * "Elementary" conversion rules.
214 */
215 case 'A': /* The day of week, using the locale's form. */
216 case 'a':
217 bp = find_string(bp, &tm->tm_wday,
218 _TIME_LOCALE(loc)->day, _TIME_LOCALE(loc)->abday, 7);
219 LEGAL_ALT(0);
220 state |= S_WDAY;
221 continue;
222
223 case 'B': /* The month, using the locale's form. */
224 case 'b':
225 case 'h':
226 bp = find_string(bp, &tm->tm_mon,
227 _TIME_LOCALE(loc)->mon, _TIME_LOCALE(loc)->abmon,
228 12);
229 LEGAL_ALT(0);
230 state |= S_MON;
231 continue;
232
233 case 'C': /* The century number. */
234 i = 20;
235 bp = conv_num(bp, &i, 0, 99);
236
237 i = i * 100 - TM_YEAR_BASE;
238 if (split_year)
239 i += tm->tm_year % 100;
240 split_year = 1;
241 tm->tm_year = i;
242 LEGAL_ALT(ALT_E);
243 state |= S_YEAR;
244 continue;
245
246 case 'd': /* The day of month. */
247 case 'e':
248 bp = conv_num(bp, &tm->tm_mday, 1, 31);
249 LEGAL_ALT(ALT_O);
250 state |= S_MDAY;
251 continue;
252
253 case 'k': /* The hour (24-hour clock representation). */
254 LEGAL_ALT(0);
255 /* FALLTHROUGH */
256 case 'H':
257 bp = conv_num(bp, &tm->tm_hour, 0, 23);
258 LEGAL_ALT(ALT_O);
259 continue;
260
261 case 'l': /* The hour (12-hour clock representation). */
262 LEGAL_ALT(0);
263 /* FALLTHROUGH */
264 case 'I':
265 bp = conv_num(bp, &tm->tm_hour, 1, 12);
266 if (tm->tm_hour == 12)
267 tm->tm_hour = 0;
268 LEGAL_ALT(ALT_O);
269 continue;
270
271 case 'j': /* The day of year. */
272 i = 1;
273 bp = conv_num(bp, &i, 1, 366);
274 tm->tm_yday = i - 1;
275 LEGAL_ALT(0);
276 state |= S_YDAY;
277 continue;
278
279 case 'M': /* The minute. */
280 bp = conv_num(bp, &tm->tm_min, 0, 59);
281 LEGAL_ALT(ALT_O);
282 continue;
283
284 case 'm': /* The month. */
285 i = 1;
286 bp = conv_num(bp, &i, 1, 12);
287 tm->tm_mon = i - 1;
288 LEGAL_ALT(ALT_O);
289 state |= S_MON;
290 continue;
291
292 case 'p': /* The locale's equivalent of AM/PM. */
293 bp = find_string(bp, &i, _TIME_LOCALE(loc)->am_pm,
294 NULL, 2);
295 if (tm->tm_hour > 11)
296 return NULL;
297 tm->tm_hour += i * 12;
298 LEGAL_ALT(0);
299 continue;
300
301 case 'S': /* The seconds. */
302 bp = conv_num(bp, &tm->tm_sec, 0, 61);
303 LEGAL_ALT(ALT_O);
304 continue;
305
306 #ifndef TIME_MAX
307 #define TIME_MAX INT64_MAX
308 #endif
309 case 's': /* seconds since the epoch */
310 {
311 time_t sse = 0;
312 uint64_t rulim = TIME_MAX;
313
314 if (*bp < '0' || *bp > '9') {
315 bp = NULL;
316 continue;
317 }
318
319 do {
320 sse *= 10;
321 sse += *bp++ - '0';
322 rulim /= 10;
323 } while ((sse * 10 <= TIME_MAX) &&
324 rulim && *bp >= '0' && *bp <= '9');
325
326 if (sse < 0 || (uint64_t)sse > TIME_MAX) {
327 bp = NULL;
328 continue;
329 }
330
331 if (localtime_r(&sse, tm) == NULL)
332 bp = NULL;
333 else
334 state |= S_YDAY | S_WDAY |
335 S_MON | S_MDAY | S_YEAR;
336 }
337 continue;
338
339 case 'U': /* The week of year, beginning on sunday. */
340 case 'W': /* The week of year, beginning on monday. */
341 /*
342 * XXX This is bogus, as we can not assume any valid
343 * information present in the tm structure at this
344 * point to calculate a real value, so just check the
345 * range for now.
346 */
347 bp = conv_num(bp, &i, 0, 53);
348 LEGAL_ALT(ALT_O);
349 if (c == 'U')
350 day_offset = TM_SUNDAY;
351 else
352 day_offset = TM_MONDAY;
353 week_offset = i;
354 continue;
355
356 case 'w': /* The day of week, beginning on sunday. */
357 bp = conv_num(bp, &tm->tm_wday, 0, 6);
358 LEGAL_ALT(ALT_O);
359 state |= S_WDAY;
360 continue;
361
362 case 'u': /* The day of week, monday = 1. */
363 bp = conv_num(bp, &i, 1, 7);
364 tm->tm_wday = i % 7;
365 LEGAL_ALT(ALT_O);
366 continue;
367
368 case 'g': /* The year corresponding to the ISO week
369 * number but without the century.
370 */
371 bp = conv_num(bp, &i, 0, 99);
372 continue;
373
374 case 'G': /* The year corresponding to the ISO week
375 * number with century.
376 */
377 do
378 bp++;
379 while (isdigit(*bp));
380 continue;
381
382 case 'V': /* The ISO 8601:1988 week number as decimal */
383 bp = conv_num(bp, &i, 0, 53);
384 continue;
385
386 case 'Y': /* The year. */
387 i = TM_YEAR_BASE; /* just for data sanity... */
388 bp = conv_num(bp, &i, 0, 9999);
389 tm->tm_year = i - TM_YEAR_BASE;
390 LEGAL_ALT(ALT_E);
391 state |= S_YEAR;
392 continue;
393
394 case 'y': /* The year within 100 years of the epoch. */
395 /* LEGAL_ALT(ALT_E | ALT_O); */
396 bp = conv_num(bp, &i, 0, 99);
397
398 if (split_year)
399 /* preserve century */
400 i += (tm->tm_year / 100) * 100;
401 else {
402 split_year = 1;
403 if (i <= 68)
404 i = i + 2000 - TM_YEAR_BASE;
405 else
406 i = i + 1900 - TM_YEAR_BASE;
407 }
408 tm->tm_year = i;
409 state |= S_YEAR;
410 continue;
411
412 case 'Z':
413 tzset();
414 if (strncmp((const char *)bp, gmt, 3) == 0 ||
415 strncmp((const char *)bp, utc, 3) == 0) {
416 tm->tm_isdst = 0;
417 #ifdef TM_GMTOFF
418 tm->TM_GMTOFF = 0;
419 #endif
420 #ifdef TM_ZONE
421 tm->TM_ZONE = gmt;
422 #endif
423 bp += 3;
424 } else {
425 ep = find_string(bp, &i,
426 (const char * const *)tzname,
427 NULL, 2);
428 if (ep != NULL) {
429 tm->tm_isdst = i;
430 #ifdef TM_GMTOFF
431 tm->TM_GMTOFF = -(timezone);
432 #endif
433 #ifdef TM_ZONE
434 tm->TM_ZONE = tzname[i];
435 #endif
436 }
437 bp = ep;
438 }
439 continue;
440
441 case 'z':
442 /*
443 * We recognize all ISO 8601 formats:
444 * Z = Zulu time/UTC
445 * [+-]hhmm
446 * [+-]hh:mm
447 * [+-]hh
448 * We recognize all RFC-822/RFC-2822 formats:
449 * UT|GMT
450 * North American : UTC offsets
451 * E[DS]T = Eastern : -4 | -5
452 * C[DS]T = Central : -5 | -6
453 * M[DS]T = Mountain: -6 | -7
454 * P[DS]T = Pacific : -7 | -8
455 * Military
456 * [A-IL-M] = -1 ... -9 (J not used)
457 * [N-Y] = +1 ... +12
458 */
459 while (isspace(*bp))
460 bp++;
461
462 switch (*bp++) {
463 case 'G':
464 if (*bp++ != 'M')
465 return NULL;
466 /*FALLTHROUGH*/
467 case 'U':
468 if (*bp++ != 'T')
469 return NULL;
470 /*FALLTHROUGH*/
471 case 'Z':
472 tm->tm_isdst = 0;
473 #ifdef TM_GMTOFF
474 tm->TM_GMTOFF = 0;
475 #endif
476 #ifdef TM_ZONE
477 tm->TM_ZONE = utc;
478 #endif
479 continue;
480 case '+':
481 neg = 0;
482 state |= S_WDAY | S_MON | S_MDAY | S_YEAR;
483 break;
484 case '-':
485 neg = 1;
486 break;
487 default:
488 --bp;
489 ep = find_string(bp, &i, nast, NULL, 4);
490 if (ep != NULL) {
491 #ifdef TM_GMTOFF
492 tm->TM_GMTOFF = -5 - i;
493 #endif
494 #ifdef TM_ZONE
495 tm->TM_ZONE = __UNCONST(nast[i]);
496 #endif
497 bp = ep;
498 continue;
499 }
500 ep = find_string(bp, &i, nadt, NULL, 4);
501 if (ep != NULL) {
502 tm->tm_isdst = 1;
503 #ifdef TM_GMTOFF
504 tm->TM_GMTOFF = -4 - i;
505 #endif
506 #ifdef TM_ZONE
507 tm->TM_ZONE = __UNCONST(nadt[i]);
508 #endif
509 bp = ep;
510 continue;
511 }
512
513 if ((*bp >= 'A' && *bp <= 'I') ||
514 (*bp >= 'L' && *bp <= 'Y')) {
515 #ifdef TM_GMTOFF
516 /* Argh! No 'J'! */
517 if (*bp >= 'A' && *bp <= 'I')
518 tm->TM_GMTOFF =
519 ('A' - 1) - (int)*bp;
520 else if (*bp >= 'L' && *bp <= 'M')
521 tm->TM_GMTOFF = 'A' - (int)*bp;
522 else if (*bp >= 'N' && *bp <= 'Y')
523 tm->TM_GMTOFF = (int)*bp - 'M';
524 #endif
525 #ifdef TM_ZONE
526 tm->TM_ZONE = NULL; /* XXX */
527 #endif
528 bp++;
529 continue;
530 }
531 return NULL;
532 }
533 offs = 0;
534 for (i = 0; i < 4; ) {
535 if (isdigit(*bp)) {
536 offs = offs * 10 + (*bp++ - '0');
537 i++;
538 continue;
539 }
540 if (i == 2 && *bp == ':') {
541 bp++;
542 continue;
543 }
544 break;
545 }
546 switch (i) {
547 case 2:
548 offs *= 100;
549 break;
550 case 4:
551 i = offs % 100;
552 if (i >= 60)
553 return NULL;
554 /* Convert minutes into decimal */
555 offs = (offs / 100) * 100 + (i * 50) / 30;
556 break;
557 default:
558 return NULL;
559 }
560 if (neg)
561 offs = -offs;
562 tm->tm_isdst = 0; /* XXX */
563 #ifdef TM_GMTOFF
564 tm->TM_GMTOFF = offs;
565 #endif
566 #ifdef TM_ZONE
567 tm->TM_ZONE = NULL; /* XXX */
568 #endif
569 continue;
570
571 /*
572 * Miscellaneous conversions.
573 */
574 case 'n': /* Any kind of white-space. */
575 case 't':
576 while (isspace(*bp))
577 bp++;
578 LEGAL_ALT(0);
579 continue;
580
581
582 default: /* Unknown/unsupported conversion. */
583 return NULL;
584 }
585 }
586
587 if (!HAVE_YDAY(state) && HAVE_YEAR(state)) {
588 if (HAVE_MON(state) && HAVE_MDAY(state)) {
589 tm->tm_yday = start_of_month[isleap_sum(tm->tm_year,
590 TM_YEAR_BASE)][tm->tm_mon] + (tm->tm_mday - 1);
591 state |= S_YDAY;
592 } else if (day_offset != -1) {
593 /* Set the date to the first Sunday (or Monday)
594 * of the specified week of the year.
595 */
596 if (!HAVE_WDAY(state)) {
597 tm->tm_wday = day_offset;
598 state |= S_WDAY;
599 }
600 tm->tm_yday = (7 -
601 first_wday_of(tm->tm_year + TM_YEAR_BASE) +
602 day_offset) % 7 + (week_offset - 1) * 7 +
603 tm->tm_wday - day_offset;
604 state |= S_YDAY;
605 }
606 }
607
608 if (HAVE_YDAY(state) && HAVE_YEAR(state)) {
609 int isleap;
610 if (!HAVE_MON(state)) {
611 i = 0;
612 isleap = isleap_sum(tm->tm_year, TM_YEAR_BASE);
613 while (tm->tm_yday >= start_of_month[isleap][i])
614 i++;
615 if (i > 12) {
616 i = 1;
617 tm->tm_yday -= start_of_month[isleap][12];
618 tm->tm_year++;
619 }
620 tm->tm_mon = i - 1;
621 state |= S_MON;
622 }
623 if (!HAVE_MDAY(state)) {
624 isleap = isleap_sum(tm->tm_year, TM_YEAR_BASE);
625 tm->tm_mday = tm->tm_yday -
626 start_of_month[isleap][tm->tm_mon] + 1;
627 state |= S_MDAY;
628 }
629 if (!HAVE_WDAY(state)) {
630 i = 0;
631 week_offset = first_wday_of(tm->tm_year);
632 while (i++ <= tm->tm_yday) {
633 if (week_offset++ >= 6)
634 week_offset = 0;
635 }
636 tm->tm_wday = week_offset;
637 state |= S_WDAY;
638 }
639 }
640
641 return __UNCONST(bp);
642 }
643
644
645 static const u_char *
646 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
647 {
648 uint result = 0;
649 unsigned char ch;
650
651 /* The limit also determines the number of valid digits. */
652 uint rulim = ulim;
653
654 ch = *buf;
655 if (ch < '0' || ch > '9')
656 return NULL;
657
658 do {
659 result *= 10;
660 result += ch - '0';
661 rulim /= 10;
662 ch = *++buf;
663 } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
664
665 if (result < llim || result > ulim)
666 return NULL;
667
668 *dest = result;
669 return buf;
670 }
671
672 static const u_char *
673 find_string(const u_char *bp, int *tgt, const char * const *n1,
674 const char * const *n2, int c)
675 {
676 int i;
677 size_t len;
678
679 /* check full name - then abbreviated ones */
680 for (; n1 != NULL; n1 = n2, n2 = NULL) {
681 for (i = 0; i < c; i++, n1++) {
682 len = strlen(*n1);
683 if (strncasecmp(*n1, (const char *)bp, len) == 0) {
684 *tgt = i;
685 return bp + len;
686 }
687 }
688 }
689
690 /* Nothing matched */
691 return NULL;
692 }
693