strptime.c revision 1.40 1 /* $NetBSD: strptime.c,v 1.40 2015/07/03 13:06:54 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code was contributed to The NetBSD Foundation by Klaus Klein.
8 * Heavily optimised by David Laight
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #if defined(LIBC_SCCS) && !defined(lint)
34 __RCSID("$NetBSD: strptime.c,v 1.40 2015/07/03 13:06:54 christos Exp $");
35 #endif
36
37 #include "namespace.h"
38 #include <sys/localedef.h>
39 #include <sys/types.h>
40 #include <sys/clock.h>
41 #include <ctype.h>
42 #include <locale.h>
43 #include <string.h>
44 #include <time.h>
45 #include <tzfile.h>
46 #include "private.h"
47 #include "setlocale_local.h"
48
49 #ifdef __weak_alias
50 __weak_alias(strptime,_strptime)
51 __weak_alias(strptime_l, _strptime_l)
52 #endif
53
54 #define _TIME_LOCALE(loc) \
55 ((_TimeLocale *)((loc)->part_impl[(size_t)LC_TIME]))
56
57 /*
58 * We do not implement alternate representations. However, we always
59 * check whether a given modifier is allowed for a certain conversion.
60 */
61 #define ALT_E 0x01
62 #define ALT_O 0x02
63 #define LEGAL_ALT(x) { if (alt_format & ~(x)) return NULL; }
64
65 #define FLAG_YEAR (1 << 0)
66 #define FLAG_MONTH (1 << 1)
67 #define FLAG_YDAY (1 << 2)
68 #define FLAG_MDAY (1 << 3)
69 #define FLAG_WDAY (1 << 4)
70
71 static char gmt[] = { "GMT" };
72 static char utc[] = { "UTC" };
73 /* RFC-822/RFC-2822 */
74 static const char * const nast[5] = {
75 "EST", "CST", "MST", "PST", "\0\0\0"
76 };
77 static const char * const nadt[5] = {
78 "EDT", "CDT", "MDT", "PDT", "\0\0\0"
79 };
80
81 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
82 static const u_char *find_string(const u_char *, int *, const char * const *,
83 const char * const *, int);
84
85 static const int start_of_month[2][13] = {
86 { 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 },
87 { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
88 };
89
90 /*
91 * Calculate the week day of the first day of a year. Valid for
92 * the Gregorian calendar, which began Sept 14, 1752 in the UK
93 * and its colonies. Ref:
94 * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week
95 */
96
97 static int
98 first_wday_of(int yr)
99 {
100 return ((2 * (3 - (yr / 100) % 4)) + (yr % 100) + ((yr % 100) / 4) +
101 (is_leap_year(yr) ? 6 : 0) + 1) % 7;
102 }
103
104 char *
105 strptime(const char *buf, const char *fmt, struct tm *tm)
106 {
107 return strptime_l(buf, fmt, tm, _current_locale());
108 }
109
110 char *
111 strptime_l(const char *buf, const char *fmt, struct tm *tm, locale_t loc)
112 {
113 unsigned char c;
114 const unsigned char *bp, *ep;
115 int alt_format, i, split_year = 0, neg = 0, flags = 0,
116 day_offset = -1, week_offset = 0, offs;
117 const char *new_fmt;
118
119 bp = (const u_char *)buf;
120
121 while (bp != NULL && (c = *fmt++) != '\0') {
122 /* Clear `alternate' modifier prior to new conversion. */
123 alt_format = 0;
124 i = 0;
125
126 /* Eat up white-space. */
127 if (isspace(c)) {
128 while (isspace(*bp))
129 bp++;
130 continue;
131 }
132
133 if (c != '%')
134 goto literal;
135
136
137 again: switch (c = *fmt++) {
138 case '%': /* "%%" is converted to "%". */
139 literal:
140 if (c != *bp++)
141 return NULL;
142 LEGAL_ALT(0);
143 continue;
144
145 /*
146 * "Alternative" modifiers. Just set the appropriate flag
147 * and start over again.
148 */
149 case 'E': /* "%E?" alternative conversion modifier. */
150 LEGAL_ALT(0);
151 alt_format |= ALT_E;
152 goto again;
153
154 case 'O': /* "%O?" alternative conversion modifier. */
155 LEGAL_ALT(0);
156 alt_format |= ALT_O;
157 goto again;
158
159 /*
160 * "Complex" conversion rules, implemented through recursion.
161 */
162 case 'c': /* Date and time, using the locale's format. */
163 new_fmt = _TIME_LOCALE(loc)->d_t_fmt;
164 flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY |
165 FLAG_YEAR;
166 goto recurse;
167
168 case 'D': /* The date as "%m/%d/%y". */
169 new_fmt = "%m/%d/%y";
170 LEGAL_ALT(0);
171 flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
172 goto recurse;
173
174 case 'F': /* The date as "%Y-%m-%d". */
175 new_fmt = "%Y-%m-%d";
176 LEGAL_ALT(0);
177 flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
178 goto recurse;
179
180 case 'R': /* The time as "%H:%M". */
181 new_fmt = "%H:%M";
182 LEGAL_ALT(0);
183 goto recurse;
184
185 case 'r': /* The time in 12-hour clock representation. */
186 new_fmt = _TIME_LOCALE(loc)->t_fmt_ampm;
187 LEGAL_ALT(0);
188 goto recurse;
189
190 case 'T': /* The time as "%H:%M:%S". */
191 new_fmt = "%H:%M:%S";
192 LEGAL_ALT(0);
193 goto recurse;
194
195 case 'X': /* The time, using the locale's format. */
196 new_fmt = _TIME_LOCALE(loc)->t_fmt;
197 goto recurse;
198
199 case 'x': /* The date, using the locale's format. */
200 new_fmt = _TIME_LOCALE(loc)->d_fmt;
201 flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
202 recurse:
203 bp = (const u_char *)strptime((const char *)bp,
204 new_fmt, tm);
205 LEGAL_ALT(ALT_E);
206 continue;
207
208 /*
209 * "Elementary" conversion rules.
210 */
211 case 'A': /* The day of week, using the locale's form. */
212 case 'a':
213 bp = find_string(bp, &tm->tm_wday,
214 _TIME_LOCALE(loc)->day, _TIME_LOCALE(loc)->abday, 7);
215 LEGAL_ALT(0);
216 flags |= FLAG_WDAY;
217 continue;
218
219 case 'B': /* The month, using the locale's form. */
220 case 'b':
221 case 'h':
222 bp = find_string(bp, &tm->tm_mon,
223 _TIME_LOCALE(loc)->mon, _TIME_LOCALE(loc)->abmon,
224 12);
225 LEGAL_ALT(0);
226 flags |= FLAG_MONTH;
227 continue;
228
229 case 'C': /* The century number. */
230 i = 20;
231 bp = conv_num(bp, &i, 0, 99);
232
233 i = i * 100 - TM_YEAR_BASE;
234 if (split_year)
235 i += tm->tm_year % 100;
236 split_year = 1;
237 tm->tm_year = i;
238 LEGAL_ALT(ALT_E);
239 flags |= FLAG_YEAR;
240 continue;
241
242 case 'd': /* The day of month. */
243 case 'e':
244 bp = conv_num(bp, &tm->tm_mday, 1, 31);
245 LEGAL_ALT(ALT_O);
246 flags |= FLAG_MDAY;
247 continue;
248
249 case 'k': /* The hour (24-hour clock representation). */
250 LEGAL_ALT(0);
251 /* FALLTHROUGH */
252 case 'H':
253 bp = conv_num(bp, &tm->tm_hour, 0, 23);
254 LEGAL_ALT(ALT_O);
255 continue;
256
257 case 'l': /* The hour (12-hour clock representation). */
258 LEGAL_ALT(0);
259 /* FALLTHROUGH */
260 case 'I':
261 bp = conv_num(bp, &tm->tm_hour, 1, 12);
262 if (tm->tm_hour == 12)
263 tm->tm_hour = 0;
264 LEGAL_ALT(ALT_O);
265 continue;
266
267 case 'j': /* The day of year. */
268 i = 1;
269 bp = conv_num(bp, &i, 1, 366);
270 tm->tm_yday = i - 1;
271 LEGAL_ALT(0);
272 flags |= FLAG_YDAY;
273 continue;
274
275 case 'M': /* The minute. */
276 bp = conv_num(bp, &tm->tm_min, 0, 59);
277 LEGAL_ALT(ALT_O);
278 continue;
279
280 case 'm': /* The month. */
281 i = 1;
282 bp = conv_num(bp, &i, 1, 12);
283 tm->tm_mon = i - 1;
284 LEGAL_ALT(ALT_O);
285 flags |= FLAG_MONTH;
286 continue;
287
288 case 'p': /* The locale's equivalent of AM/PM. */
289 bp = find_string(bp, &i, _TIME_LOCALE(loc)->am_pm,
290 NULL, 2);
291 if (tm->tm_hour > 11)
292 return NULL;
293 tm->tm_hour += i * 12;
294 LEGAL_ALT(0);
295 continue;
296
297 case 'S': /* The seconds. */
298 bp = conv_num(bp, &tm->tm_sec, 0, 61);
299 LEGAL_ALT(ALT_O);
300 continue;
301
302 #ifndef TIME_MAX
303 #define TIME_MAX INT64_MAX
304 #endif
305 case 's': /* seconds since the epoch */
306 {
307 time_t sse = 0;
308 uint64_t rulim = TIME_MAX;
309
310 if (*bp < '0' || *bp > '9') {
311 bp = NULL;
312 continue;
313 }
314
315 do {
316 sse *= 10;
317 sse += *bp++ - '0';
318 rulim /= 10;
319 } while ((sse * 10 <= TIME_MAX) &&
320 rulim && *bp >= '0' && *bp <= '9');
321
322 if (sse < 0 || (uint64_t)sse > TIME_MAX) {
323 bp = NULL;
324 continue;
325 }
326
327 if (localtime_r(&sse, tm) == NULL)
328 bp = NULL;
329 else
330 flags |= FLAG_YDAY | FLAG_WDAY |
331 FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
332 }
333 continue;
334
335 case 'U': /* The week of year, beginning on sunday. */
336 case 'W': /* The week of year, beginning on monday. */
337 /*
338 * XXX This is bogus, as we can not assume any valid
339 * information present in the tm structure at this
340 * point to calculate a real value, so just check the
341 * range for now.
342 */
343 bp = conv_num(bp, &i, 0, 53);
344 LEGAL_ALT(ALT_O);
345 if (c == 'U')
346 day_offset = TM_SUNDAY;
347 else
348 day_offset = TM_MONDAY;
349 week_offset = i;
350 continue;
351
352 case 'w': /* The day of week, beginning on sunday. */
353 bp = conv_num(bp, &tm->tm_wday, 0, 6);
354 LEGAL_ALT(ALT_O);
355 flags |= FLAG_WDAY;
356 continue;
357
358 case 'u': /* The day of week, monday = 1. */
359 bp = conv_num(bp, &i, 1, 7);
360 tm->tm_wday = i % 7;
361 LEGAL_ALT(ALT_O);
362 continue;
363
364 case 'g': /* The year corresponding to the ISO week
365 * number but without the century.
366 */
367 bp = conv_num(bp, &i, 0, 99);
368 continue;
369
370 case 'G': /* The year corresponding to the ISO week
371 * number with century.
372 */
373 do
374 bp++;
375 while (isdigit(*bp));
376 continue;
377
378 case 'V': /* The ISO 8601:1988 week number as decimal */
379 bp = conv_num(bp, &i, 0, 53);
380 continue;
381
382 case 'Y': /* The year. */
383 i = TM_YEAR_BASE; /* just for data sanity... */
384 bp = conv_num(bp, &i, 0, 9999);
385 tm->tm_year = i - TM_YEAR_BASE;
386 LEGAL_ALT(ALT_E);
387 flags |= FLAG_YEAR;
388 continue;
389
390 case 'y': /* The year within 100 years of the epoch. */
391 /* LEGAL_ALT(ALT_E | ALT_O); */
392 bp = conv_num(bp, &i, 0, 99);
393
394 if (split_year)
395 /* preserve century */
396 i += (tm->tm_year / 100) * 100;
397 else {
398 split_year = 1;
399 if (i <= 68)
400 i = i + 2000 - TM_YEAR_BASE;
401 else
402 i = i + 1900 - TM_YEAR_BASE;
403 }
404 tm->tm_year = i;
405 flags |= FLAG_YEAR;
406 continue;
407
408 case 'Z':
409 tzset();
410 if (strncmp((const char *)bp, gmt, 3) == 0 ||
411 strncmp((const char *)bp, utc, 3) == 0) {
412 tm->tm_isdst = 0;
413 #ifdef TM_GMTOFF
414 tm->TM_GMTOFF = 0;
415 #endif
416 #ifdef TM_ZONE
417 tm->TM_ZONE = gmt;
418 #endif
419 bp += 3;
420 } else {
421 ep = find_string(bp, &i,
422 (const char * const *)tzname,
423 NULL, 2);
424 if (ep != NULL) {
425 tm->tm_isdst = i;
426 #ifdef TM_GMTOFF
427 tm->TM_GMTOFF = -(timezone);
428 #endif
429 #ifdef TM_ZONE
430 tm->TM_ZONE = tzname[i];
431 #endif
432 }
433 bp = ep;
434 }
435 continue;
436
437 case 'z':
438 /*
439 * We recognize all ISO 8601 formats:
440 * Z = Zulu time/UTC
441 * [+-]hhmm
442 * [+-]hh:mm
443 * [+-]hh
444 * We recognize all RFC-822/RFC-2822 formats:
445 * UT|GMT
446 * North American : UTC offsets
447 * E[DS]T = Eastern : -4 | -5
448 * C[DS]T = Central : -5 | -6
449 * M[DS]T = Mountain: -6 | -7
450 * P[DS]T = Pacific : -7 | -8
451 * Military
452 * [A-IL-M] = -1 ... -9 (J not used)
453 * [N-Y] = +1 ... +12
454 */
455 while (isspace(*bp))
456 bp++;
457
458 switch (*bp++) {
459 case 'G':
460 if (*bp++ != 'M')
461 return NULL;
462 /*FALLTHROUGH*/
463 case 'U':
464 if (*bp++ != 'T')
465 return NULL;
466 /*FALLTHROUGH*/
467 case 'Z':
468 tm->tm_isdst = 0;
469 #ifdef TM_GMTOFF
470 tm->TM_GMTOFF = 0;
471 #endif
472 #ifdef TM_ZONE
473 tm->TM_ZONE = utc;
474 #endif
475 continue;
476 case '+':
477 neg = 0;
478 flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY |
479 FLAG_YEAR;
480 break;
481 case '-':
482 neg = 1;
483 break;
484 default:
485 --bp;
486 ep = find_string(bp, &i, nast, NULL, 4);
487 if (ep != NULL) {
488 #ifdef TM_GMTOFF
489 tm->TM_GMTOFF = -5 - i;
490 #endif
491 #ifdef TM_ZONE
492 tm->TM_ZONE = __UNCONST(nast[i]);
493 #endif
494 bp = ep;
495 continue;
496 }
497 ep = find_string(bp, &i, nadt, NULL, 4);
498 if (ep != NULL) {
499 tm->tm_isdst = 1;
500 #ifdef TM_GMTOFF
501 tm->TM_GMTOFF = -4 - i;
502 #endif
503 #ifdef TM_ZONE
504 tm->TM_ZONE = __UNCONST(nadt[i]);
505 #endif
506 bp = ep;
507 continue;
508 }
509
510 if ((*bp >= 'A' && *bp <= 'I') ||
511 (*bp >= 'L' && *bp <= 'Y')) {
512 #ifdef TM_GMTOFF
513 /* Argh! No 'J'! */
514 if (*bp >= 'A' && *bp <= 'I')
515 tm->TM_GMTOFF =
516 ('A' - 1) - (int)*bp;
517 else if (*bp >= 'L' && *bp <= 'M')
518 tm->TM_GMTOFF = 'A' - (int)*bp;
519 else if (*bp >= 'N' && *bp <= 'Y')
520 tm->TM_GMTOFF = (int)*bp - 'M';
521 #endif
522 #ifdef TM_ZONE
523 tm->TM_ZONE = NULL; /* XXX */
524 #endif
525 bp++;
526 continue;
527 }
528 return NULL;
529 }
530 offs = 0;
531 for (i = 0; i < 4; ) {
532 if (isdigit(*bp)) {
533 offs = offs * 10 + (*bp++ - '0');
534 i++;
535 continue;
536 }
537 if (i == 2 && *bp == ':') {
538 bp++;
539 continue;
540 }
541 break;
542 }
543 switch (i) {
544 case 2:
545 offs *= 100;
546 break;
547 case 4:
548 i = offs % 100;
549 if (i >= 60)
550 return NULL;
551 /* Convert minutes into decimal */
552 offs = (offs / 100) * 100 + (i * 50) / 30;
553 break;
554 default:
555 return NULL;
556 }
557 if (neg)
558 offs = -offs;
559 tm->tm_isdst = 0; /* XXX */
560 #ifdef TM_GMTOFF
561 tm->TM_GMTOFF = offs;
562 #endif
563 #ifdef TM_ZONE
564 tm->TM_ZONE = NULL; /* XXX */
565 #endif
566 continue;
567
568 /*
569 * Miscellaneous conversions.
570 */
571 case 'n': /* Any kind of white-space. */
572 case 't':
573 while (isspace(*bp))
574 bp++;
575 LEGAL_ALT(0);
576 continue;
577
578
579 default: /* Unknown/unsupported conversion. */
580 return NULL;
581 }
582 }
583
584 if (!(flags & FLAG_YDAY) && (flags & FLAG_YEAR)) {
585 if ((flags & (FLAG_MONTH | FLAG_MDAY)) ==
586 (FLAG_MONTH | FLAG_MDAY)) {
587 tm->tm_yday = start_of_month[is_leap_year(tm->tm_year +
588 TM_YEAR_BASE)][tm->tm_mon] + (tm->tm_mday - 1);
589 flags |= FLAG_YDAY;
590 } else if (day_offset != -1) {
591 /* Set the date to the first Sunday (or Monday)
592 * of the specified week of the year.
593 */
594 if (!(flags & FLAG_WDAY)) {
595 tm->tm_wday = day_offset;
596 flags |= FLAG_WDAY;
597 }
598 tm->tm_yday = (7 -
599 first_wday_of(tm->tm_year + TM_YEAR_BASE) +
600 day_offset) % 7 + (week_offset - 1) * 7 +
601 tm->tm_wday - day_offset;
602 flags |= FLAG_YDAY;
603 }
604 }
605
606 if ((flags & (FLAG_YEAR | FLAG_YDAY)) == (FLAG_YEAR | FLAG_YDAY)) {
607 int isleap;
608 if (!(flags & FLAG_MONTH)) {
609 i = 0;
610 isleap = is_leap_year(tm->tm_year + TM_YEAR_BASE);
611 while (tm->tm_yday >= start_of_month[isleap][i])
612 i++;
613 if (i > 12) {
614 i = 1;
615 tm->tm_yday -= start_of_month[isleap][12];
616 tm->tm_year++;
617 }
618 tm->tm_mon = i - 1;
619 flags |= FLAG_MONTH;
620 }
621 if (!(flags & FLAG_MDAY)) {
622 isleap = is_leap_year(tm->tm_year + TM_YEAR_BASE);
623 tm->tm_mday = tm->tm_yday -
624 start_of_month[isleap][tm->tm_mon] + 1;
625 flags |= FLAG_MDAY;
626 }
627 if (!(flags & FLAG_WDAY)) {
628 i = 0;
629 week_offset = first_wday_of(tm->tm_year);
630 while (i++ <= tm->tm_yday) {
631 if (week_offset++ >= 6)
632 week_offset = 0;
633 }
634 tm->tm_wday = week_offset;
635 flags |= FLAG_WDAY;
636 }
637 }
638
639 return __UNCONST(bp);
640 }
641
642
643 static const u_char *
644 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
645 {
646 uint result = 0;
647 unsigned char ch;
648
649 /* The limit also determines the number of valid digits. */
650 uint rulim = ulim;
651
652 ch = *buf;
653 if (ch < '0' || ch > '9')
654 return NULL;
655
656 do {
657 result *= 10;
658 result += ch - '0';
659 rulim /= 10;
660 ch = *++buf;
661 } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
662
663 if (result < llim || result > ulim)
664 return NULL;
665
666 *dest = result;
667 return buf;
668 }
669
670 static const u_char *
671 find_string(const u_char *bp, int *tgt, const char * const *n1,
672 const char * const *n2, int c)
673 {
674 int i;
675 size_t len;
676
677 /* check full name - then abbreviated ones */
678 for (; n1 != NULL; n1 = n2, n2 = NULL) {
679 for (i = 0; i < c; i++, n1++) {
680 len = strlen(*n1);
681 if (strncasecmp(*n1, (const char *)bp, len) == 0) {
682 *tgt = i;
683 return bp + len;
684 }
685 }
686 }
687
688 /* Nothing matched */
689 return NULL;
690 }
691