strptime.c revision 1.39 1 /* $NetBSD: strptime.c,v 1.39 2015/04/06 14:38:22 ginsbach Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code was contributed to The NetBSD Foundation by Klaus Klein.
8 * Heavily optimised by David Laight
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #if defined(LIBC_SCCS) && !defined(lint)
34 __RCSID("$NetBSD: strptime.c,v 1.39 2015/04/06 14:38:22 ginsbach Exp $");
35 #endif
36
37 #include "namespace.h"
38 #include <sys/localedef.h>
39 #include <ctype.h>
40 #include <locale.h>
41 #include <string.h>
42 #include <time.h>
43 #include <tzfile.h>
44 #include "private.h"
45 #include "setlocale_local.h"
46
47 #ifdef __weak_alias
48 __weak_alias(strptime,_strptime)
49 __weak_alias(strptime_l, _strptime_l)
50 #endif
51
52 #define _TIME_LOCALE(loc) \
53 ((_TimeLocale *)((loc)->part_impl[(size_t)LC_TIME]))
54
55 /*
56 * We do not implement alternate representations. However, we always
57 * check whether a given modifier is allowed for a certain conversion.
58 */
59 #define ALT_E 0x01
60 #define ALT_O 0x02
61 #define LEGAL_ALT(x) { if (alt_format & ~(x)) return NULL; }
62
63 static char gmt[] = { "GMT" };
64 static char utc[] = { "UTC" };
65 /* RFC-822/RFC-2822 */
66 static const char * const nast[5] = {
67 "EST", "CST", "MST", "PST", "\0\0\0"
68 };
69 static const char * const nadt[5] = {
70 "EDT", "CDT", "MDT", "PDT", "\0\0\0"
71 };
72
73 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
74 static const u_char *find_string(const u_char *, int *, const char * const *,
75 const char * const *, int);
76
77 char *
78 strptime(const char *buf, const char *fmt, struct tm *tm)
79 {
80 return strptime_l(buf, fmt, tm, _current_locale());
81 }
82
83 char *
84 strptime_l(const char *buf, const char *fmt, struct tm *tm, locale_t loc)
85 {
86 unsigned char c;
87 const unsigned char *bp, *ep;
88 int alt_format, i, split_year = 0, neg = 0, offs;
89 const char *new_fmt;
90
91 bp = (const u_char *)buf;
92
93 while (bp != NULL && (c = *fmt++) != '\0') {
94 /* Clear `alternate' modifier prior to new conversion. */
95 alt_format = 0;
96 i = 0;
97
98 /* Eat up white-space. */
99 if (isspace(c)) {
100 while (isspace(*bp))
101 bp++;
102 continue;
103 }
104
105 if (c != '%')
106 goto literal;
107
108
109 again: switch (c = *fmt++) {
110 case '%': /* "%%" is converted to "%". */
111 literal:
112 if (c != *bp++)
113 return NULL;
114 LEGAL_ALT(0);
115 continue;
116
117 /*
118 * "Alternative" modifiers. Just set the appropriate flag
119 * and start over again.
120 */
121 case 'E': /* "%E?" alternative conversion modifier. */
122 LEGAL_ALT(0);
123 alt_format |= ALT_E;
124 goto again;
125
126 case 'O': /* "%O?" alternative conversion modifier. */
127 LEGAL_ALT(0);
128 alt_format |= ALT_O;
129 goto again;
130
131 /*
132 * "Complex" conversion rules, implemented through recursion.
133 */
134 case 'c': /* Date and time, using the locale's format. */
135 new_fmt = _TIME_LOCALE(loc)->d_t_fmt;
136 goto recurse;
137
138 case 'D': /* The date as "%m/%d/%y". */
139 new_fmt = "%m/%d/%y";
140 LEGAL_ALT(0);
141 goto recurse;
142
143 case 'F': /* The date as "%Y-%m-%d". */
144 new_fmt = "%Y-%m-%d";
145 LEGAL_ALT(0);
146 goto recurse;
147
148 case 'R': /* The time as "%H:%M". */
149 new_fmt = "%H:%M";
150 LEGAL_ALT(0);
151 goto recurse;
152
153 case 'r': /* The time in 12-hour clock representation. */
154 new_fmt = _TIME_LOCALE(loc)->t_fmt_ampm;
155 LEGAL_ALT(0);
156 goto recurse;
157
158 case 'T': /* The time as "%H:%M:%S". */
159 new_fmt = "%H:%M:%S";
160 LEGAL_ALT(0);
161 goto recurse;
162
163 case 'X': /* The time, using the locale's format. */
164 new_fmt = _TIME_LOCALE(loc)->t_fmt;
165 goto recurse;
166
167 case 'x': /* The date, using the locale's format. */
168 new_fmt = _TIME_LOCALE(loc)->d_fmt;
169 recurse:
170 bp = (const u_char *)strptime((const char *)bp,
171 new_fmt, tm);
172 LEGAL_ALT(ALT_E);
173 continue;
174
175 /*
176 * "Elementary" conversion rules.
177 */
178 case 'A': /* The day of week, using the locale's form. */
179 case 'a':
180 bp = find_string(bp, &tm->tm_wday,
181 _TIME_LOCALE(loc)->day, _TIME_LOCALE(loc)->abday, 7);
182 LEGAL_ALT(0);
183 continue;
184
185 case 'B': /* The month, using the locale's form. */
186 case 'b':
187 case 'h':
188 bp = find_string(bp, &tm->tm_mon,
189 _TIME_LOCALE(loc)->mon, _TIME_LOCALE(loc)->abmon,
190 12);
191 LEGAL_ALT(0);
192 continue;
193
194 case 'C': /* The century number. */
195 i = 20;
196 bp = conv_num(bp, &i, 0, 99);
197
198 i = i * 100 - TM_YEAR_BASE;
199 if (split_year)
200 i += tm->tm_year % 100;
201 split_year = 1;
202 tm->tm_year = i;
203 LEGAL_ALT(ALT_E);
204 continue;
205
206 case 'd': /* The day of month. */
207 case 'e':
208 bp = conv_num(bp, &tm->tm_mday, 1, 31);
209 LEGAL_ALT(ALT_O);
210 continue;
211
212 case 'k': /* The hour (24-hour clock representation). */
213 LEGAL_ALT(0);
214 /* FALLTHROUGH */
215 case 'H':
216 bp = conv_num(bp, &tm->tm_hour, 0, 23);
217 LEGAL_ALT(ALT_O);
218 continue;
219
220 case 'l': /* The hour (12-hour clock representation). */
221 LEGAL_ALT(0);
222 /* FALLTHROUGH */
223 case 'I':
224 bp = conv_num(bp, &tm->tm_hour, 1, 12);
225 if (tm->tm_hour == 12)
226 tm->tm_hour = 0;
227 LEGAL_ALT(ALT_O);
228 continue;
229
230 case 'j': /* The day of year. */
231 i = 1;
232 bp = conv_num(bp, &i, 1, 366);
233 tm->tm_yday = i - 1;
234 LEGAL_ALT(0);
235 continue;
236
237 case 'M': /* The minute. */
238 bp = conv_num(bp, &tm->tm_min, 0, 59);
239 LEGAL_ALT(ALT_O);
240 continue;
241
242 case 'm': /* The month. */
243 i = 1;
244 bp = conv_num(bp, &i, 1, 12);
245 tm->tm_mon = i - 1;
246 LEGAL_ALT(ALT_O);
247 continue;
248
249 case 'p': /* The locale's equivalent of AM/PM. */
250 bp = find_string(bp, &i, _TIME_LOCALE(loc)->am_pm,
251 NULL, 2);
252 if (tm->tm_hour > 11)
253 return NULL;
254 tm->tm_hour += i * 12;
255 LEGAL_ALT(0);
256 continue;
257
258 case 'S': /* The seconds. */
259 bp = conv_num(bp, &tm->tm_sec, 0, 61);
260 LEGAL_ALT(ALT_O);
261 continue;
262
263 #ifndef TIME_MAX
264 #define TIME_MAX INT64_MAX
265 #endif
266 case 's': /* seconds since the epoch */
267 {
268 time_t sse = 0;
269 uint64_t rulim = TIME_MAX;
270
271 if (*bp < '0' || *bp > '9') {
272 bp = NULL;
273 continue;
274 }
275
276 do {
277 sse *= 10;
278 sse += *bp++ - '0';
279 rulim /= 10;
280 } while ((sse * 10 <= TIME_MAX) &&
281 rulim && *bp >= '0' && *bp <= '9');
282
283 if (sse < 0 || (uint64_t)sse > TIME_MAX) {
284 bp = NULL;
285 continue;
286 }
287
288 if (localtime_r(&sse, tm) == NULL)
289 bp = NULL;
290 }
291 continue;
292
293 case 'U': /* The week of year, beginning on sunday. */
294 case 'W': /* The week of year, beginning on monday. */
295 /*
296 * XXX This is bogus, as we can not assume any valid
297 * information present in the tm structure at this
298 * point to calculate a real value, so just check the
299 * range for now.
300 */
301 bp = conv_num(bp, &i, 0, 53);
302 LEGAL_ALT(ALT_O);
303 continue;
304
305 case 'w': /* The day of week, beginning on sunday. */
306 bp = conv_num(bp, &tm->tm_wday, 0, 6);
307 LEGAL_ALT(ALT_O);
308 continue;
309
310 case 'u': /* The day of week, monday = 1. */
311 bp = conv_num(bp, &i, 1, 7);
312 tm->tm_wday = i % 7;
313 LEGAL_ALT(ALT_O);
314 continue;
315
316 case 'g': /* The year corresponding to the ISO week
317 * number but without the century.
318 */
319 bp = conv_num(bp, &i, 0, 99);
320 continue;
321
322 case 'G': /* The year corresponding to the ISO week
323 * number with century.
324 */
325 do
326 bp++;
327 while (isdigit(*bp));
328 continue;
329
330 case 'V': /* The ISO 8601:1988 week number as decimal */
331 bp = conv_num(bp, &i, 0, 53);
332 continue;
333
334 case 'Y': /* The year. */
335 i = TM_YEAR_BASE; /* just for data sanity... */
336 bp = conv_num(bp, &i, 0, 9999);
337 tm->tm_year = i - TM_YEAR_BASE;
338 LEGAL_ALT(ALT_E);
339 continue;
340
341 case 'y': /* The year within 100 years of the epoch. */
342 /* LEGAL_ALT(ALT_E | ALT_O); */
343 bp = conv_num(bp, &i, 0, 99);
344
345 if (split_year)
346 /* preserve century */
347 i += (tm->tm_year / 100) * 100;
348 else {
349 split_year = 1;
350 if (i <= 68)
351 i = i + 2000 - TM_YEAR_BASE;
352 else
353 i = i + 1900 - TM_YEAR_BASE;
354 }
355 tm->tm_year = i;
356 continue;
357
358 case 'Z':
359 tzset();
360 if (strncmp((const char *)bp, gmt, 3) == 0 ||
361 strncmp((const char *)bp, utc, 3) == 0) {
362 tm->tm_isdst = 0;
363 #ifdef TM_GMTOFF
364 tm->TM_GMTOFF = 0;
365 #endif
366 #ifdef TM_ZONE
367 tm->TM_ZONE = gmt;
368 #endif
369 bp += 3;
370 } else {
371 ep = find_string(bp, &i,
372 (const char * const *)tzname,
373 NULL, 2);
374 if (ep != NULL) {
375 tm->tm_isdst = i;
376 #ifdef TM_GMTOFF
377 tm->TM_GMTOFF = -(timezone);
378 #endif
379 #ifdef TM_ZONE
380 tm->TM_ZONE = tzname[i];
381 #endif
382 }
383 bp = ep;
384 }
385 continue;
386
387 case 'z':
388 /*
389 * We recognize all ISO 8601 formats:
390 * Z = Zulu time/UTC
391 * [+-]hhmm
392 * [+-]hh:mm
393 * [+-]hh
394 * We recognize all RFC-822/RFC-2822 formats:
395 * UT|GMT
396 * North American : UTC offsets
397 * E[DS]T = Eastern : -4 | -5
398 * C[DS]T = Central : -5 | -6
399 * M[DS]T = Mountain: -6 | -7
400 * P[DS]T = Pacific : -7 | -8
401 * Military
402 * [A-IL-M] = -1 ... -9 (J not used)
403 * [N-Y] = +1 ... +12
404 */
405 while (isspace(*bp))
406 bp++;
407
408 switch (*bp++) {
409 case 'G':
410 if (*bp++ != 'M')
411 return NULL;
412 /*FALLTHROUGH*/
413 case 'U':
414 if (*bp++ != 'T')
415 return NULL;
416 /*FALLTHROUGH*/
417 case 'Z':
418 tm->tm_isdst = 0;
419 #ifdef TM_GMTOFF
420 tm->TM_GMTOFF = 0;
421 #endif
422 #ifdef TM_ZONE
423 tm->TM_ZONE = utc;
424 #endif
425 continue;
426 case '+':
427 neg = 0;
428 break;
429 case '-':
430 neg = 1;
431 break;
432 default:
433 --bp;
434 ep = find_string(bp, &i, nast, NULL, 4);
435 if (ep != NULL) {
436 #ifdef TM_GMTOFF
437 tm->TM_GMTOFF = -5 - i;
438 #endif
439 #ifdef TM_ZONE
440 tm->TM_ZONE = __UNCONST(nast[i]);
441 #endif
442 bp = ep;
443 continue;
444 }
445 ep = find_string(bp, &i, nadt, NULL, 4);
446 if (ep != NULL) {
447 tm->tm_isdst = 1;
448 #ifdef TM_GMTOFF
449 tm->TM_GMTOFF = -4 - i;
450 #endif
451 #ifdef TM_ZONE
452 tm->TM_ZONE = __UNCONST(nadt[i]);
453 #endif
454 bp = ep;
455 continue;
456 }
457
458 if ((*bp >= 'A' && *bp <= 'I') ||
459 (*bp >= 'L' && *bp <= 'Y')) {
460 #ifdef TM_GMTOFF
461 /* Argh! No 'J'! */
462 if (*bp >= 'A' && *bp <= 'I')
463 tm->TM_GMTOFF =
464 ('A' - 1) - (int)*bp;
465 else if (*bp >= 'L' && *bp <= 'M')
466 tm->TM_GMTOFF = 'A' - (int)*bp;
467 else if (*bp >= 'N' && *bp <= 'Y')
468 tm->TM_GMTOFF = (int)*bp - 'M';
469 #endif
470 #ifdef TM_ZONE
471 tm->TM_ZONE = NULL; /* XXX */
472 #endif
473 bp++;
474 continue;
475 }
476 return NULL;
477 }
478 offs = 0;
479 for (i = 0; i < 4; ) {
480 if (isdigit(*bp)) {
481 offs = offs * 10 + (*bp++ - '0');
482 i++;
483 continue;
484 }
485 if (i == 2 && *bp == ':') {
486 bp++;
487 continue;
488 }
489 break;
490 }
491 switch (i) {
492 case 2:
493 offs *= 100;
494 break;
495 case 4:
496 i = offs % 100;
497 if (i >= 60)
498 return NULL;
499 /* Convert minutes into decimal */
500 offs = (offs / 100) * 100 + (i * 50) / 30;
501 break;
502 default:
503 return NULL;
504 }
505 if (neg)
506 offs = -offs;
507 tm->tm_isdst = 0; /* XXX */
508 #ifdef TM_GMTOFF
509 tm->TM_GMTOFF = offs;
510 #endif
511 #ifdef TM_ZONE
512 tm->TM_ZONE = NULL; /* XXX */
513 #endif
514 continue;
515
516 /*
517 * Miscellaneous conversions.
518 */
519 case 'n': /* Any kind of white-space. */
520 case 't':
521 while (isspace(*bp))
522 bp++;
523 LEGAL_ALT(0);
524 continue;
525
526
527 default: /* Unknown/unsupported conversion. */
528 return NULL;
529 }
530 }
531
532 return __UNCONST(bp);
533 }
534
535
536 static const u_char *
537 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
538 {
539 uint result = 0;
540 unsigned char ch;
541
542 /* The limit also determines the number of valid digits. */
543 uint rulim = ulim;
544
545 ch = *buf;
546 if (ch < '0' || ch > '9')
547 return NULL;
548
549 do {
550 result *= 10;
551 result += ch - '0';
552 rulim /= 10;
553 ch = *++buf;
554 } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
555
556 if (result < llim || result > ulim)
557 return NULL;
558
559 *dest = result;
560 return buf;
561 }
562
563 static const u_char *
564 find_string(const u_char *bp, int *tgt, const char * const *n1,
565 const char * const *n2, int c)
566 {
567 int i;
568 size_t len;
569
570 /* check full name - then abbreviated ones */
571 for (; n1 != NULL; n1 = n2, n2 = NULL) {
572 for (i = 0; i < c; i++, n1++) {
573 len = strlen(*n1);
574 if (strncasecmp(*n1, (const char *)bp, len) == 0) {
575 *tgt = i;
576 return bp + len;
577 }
578 }
579 }
580
581 /* Nothing matched */
582 return NULL;
583 }
584