strptime.c revision 1.30 1 /* $NetBSD: strptime.c,v 1.30 2008/11/04 20:17:56 christos Exp $ */
2
3 /*-
4 * Copyright (c) 1997, 1998, 2005, 2008 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code was contributed to The NetBSD Foundation by Klaus Klein.
8 * Heavily optimised by David Laight
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32 #include <sys/cdefs.h>
33 #if defined(LIBC_SCCS) && !defined(lint)
34 __RCSID("$NetBSD: strptime.c,v 1.30 2008/11/04 20:17:56 christos Exp $");
35 #endif
36
37 #include "namespace.h"
38 #include <sys/localedef.h>
39 #include <ctype.h>
40 #include <locale.h>
41 #include <string.h>
42 #include <time.h>
43 #include <tzfile.h>
44 #include "private.h"
45
46 #ifdef __weak_alias
47 __weak_alias(strptime,_strptime)
48 #endif
49
50 #define _ctloc(x) (_CurrentTimeLocale->x)
51
52 /*
53 * We do not implement alternate representations. However, we always
54 * check whether a given modifier is allowed for a certain conversion.
55 */
56 #define ALT_E 0x01
57 #define ALT_O 0x02
58 #define LEGAL_ALT(x) { if (alt_format & ~(x)) return NULL; }
59
60 static char gmt[] = { "GMT" };
61 static char utc[] = { "UTC" };
62
63 static const u_char *conv_num(const unsigned char *, int *, uint, uint);
64 static const u_char *find_string(const u_char *, int *, const char * const *,
65 const char * const *, int);
66
67
68 char *
69 strptime(const char *buf, const char *fmt, struct tm *tm)
70 {
71 unsigned char c;
72 const unsigned char *bp;
73 int alt_format, i, split_year = 0, neg, offs;
74 const char *new_fmt;
75
76 bp = (const u_char *)buf;
77
78 while (bp != NULL && (c = *fmt++) != '\0') {
79 /* Clear `alternate' modifier prior to new conversion. */
80 alt_format = 0;
81 i = 0;
82
83 /* Eat up white-space. */
84 if (isspace(c)) {
85 while (isspace(*bp))
86 bp++;
87 continue;
88 }
89
90 if (c != '%')
91 goto literal;
92
93
94 again: switch (c = *fmt++) {
95 case '%': /* "%%" is converted to "%". */
96 literal:
97 if (c != *bp++)
98 return NULL;
99 LEGAL_ALT(0);
100 continue;
101
102 /*
103 * "Alternative" modifiers. Just set the appropriate flag
104 * and start over again.
105 */
106 case 'E': /* "%E?" alternative conversion modifier. */
107 LEGAL_ALT(0);
108 alt_format |= ALT_E;
109 goto again;
110
111 case 'O': /* "%O?" alternative conversion modifier. */
112 LEGAL_ALT(0);
113 alt_format |= ALT_O;
114 goto again;
115
116 /*
117 * "Complex" conversion rules, implemented through recursion.
118 */
119 case 'c': /* Date and time, using the locale's format. */
120 new_fmt = _ctloc(d_t_fmt);
121 goto recurse;
122
123 case 'D': /* The date as "%m/%d/%y". */
124 new_fmt = "%m/%d/%y";
125 LEGAL_ALT(0);
126 goto recurse;
127
128 case 'F': /* The date as "%Y-%m-%d". */
129 new_fmt = "%Y-%m-%d";
130 LEGAL_ALT(0);
131 goto recurse;
132
133 case 'R': /* The time as "%H:%M". */
134 new_fmt = "%H:%M";
135 LEGAL_ALT(0);
136 goto recurse;
137
138 case 'r': /* The time in 12-hour clock representation. */
139 new_fmt =_ctloc(t_fmt_ampm);
140 LEGAL_ALT(0);
141 goto recurse;
142
143 case 'T': /* The time as "%H:%M:%S". */
144 new_fmt = "%H:%M:%S";
145 LEGAL_ALT(0);
146 goto recurse;
147
148 case 'X': /* The time, using the locale's format. */
149 new_fmt =_ctloc(t_fmt);
150 goto recurse;
151
152 case 'x': /* The date, using the locale's format. */
153 new_fmt =_ctloc(d_fmt);
154 recurse:
155 bp = (const u_char *)strptime((const char *)bp,
156 new_fmt, tm);
157 LEGAL_ALT(ALT_E);
158 continue;
159
160 /*
161 * "Elementary" conversion rules.
162 */
163 case 'A': /* The day of week, using the locale's form. */
164 case 'a':
165 bp = find_string(bp, &tm->tm_wday, _ctloc(day),
166 _ctloc(abday), 7);
167 LEGAL_ALT(0);
168 continue;
169
170 case 'B': /* The month, using the locale's form. */
171 case 'b':
172 case 'h':
173 bp = find_string(bp, &tm->tm_mon, _ctloc(mon),
174 _ctloc(abmon), 12);
175 LEGAL_ALT(0);
176 continue;
177
178 case 'C': /* The century number. */
179 i = 20;
180 bp = conv_num(bp, &i, 0, 99);
181
182 i = i * 100 - TM_YEAR_BASE;
183 if (split_year)
184 i += tm->tm_year % 100;
185 split_year = 1;
186 tm->tm_year = i;
187 LEGAL_ALT(ALT_E);
188 continue;
189
190 case 'd': /* The day of month. */
191 case 'e':
192 bp = conv_num(bp, &tm->tm_mday, 1, 31);
193 LEGAL_ALT(ALT_O);
194 continue;
195
196 case 'k': /* The hour (24-hour clock representation). */
197 LEGAL_ALT(0);
198 /* FALLTHROUGH */
199 case 'H':
200 bp = conv_num(bp, &tm->tm_hour, 0, 23);
201 LEGAL_ALT(ALT_O);
202 continue;
203
204 case 'l': /* The hour (12-hour clock representation). */
205 LEGAL_ALT(0);
206 /* FALLTHROUGH */
207 case 'I':
208 bp = conv_num(bp, &tm->tm_hour, 1, 12);
209 if (tm->tm_hour == 12)
210 tm->tm_hour = 0;
211 LEGAL_ALT(ALT_O);
212 continue;
213
214 case 'j': /* The day of year. */
215 i = 1;
216 bp = conv_num(bp, &i, 1, 366);
217 tm->tm_yday = i - 1;
218 LEGAL_ALT(0);
219 continue;
220
221 case 'M': /* The minute. */
222 bp = conv_num(bp, &tm->tm_min, 0, 59);
223 LEGAL_ALT(ALT_O);
224 continue;
225
226 case 'm': /* The month. */
227 i = 1;
228 bp = conv_num(bp, &i, 1, 12);
229 tm->tm_mon = i - 1;
230 LEGAL_ALT(ALT_O);
231 continue;
232
233 case 'p': /* The locale's equivalent of AM/PM. */
234 bp = find_string(bp, &i, _ctloc(am_pm), NULL, 2);
235 if (tm->tm_hour > 11)
236 return NULL;
237 tm->tm_hour += i * 12;
238 LEGAL_ALT(0);
239 continue;
240
241 case 'S': /* The seconds. */
242 bp = conv_num(bp, &tm->tm_sec, 0, 61);
243 LEGAL_ALT(ALT_O);
244 continue;
245
246 case 'U': /* The week of year, beginning on sunday. */
247 case 'W': /* The week of year, beginning on monday. */
248 /*
249 * XXX This is bogus, as we can not assume any valid
250 * information present in the tm structure at this
251 * point to calculate a real value, so just check the
252 * range for now.
253 */
254 bp = conv_num(bp, &i, 0, 53);
255 LEGAL_ALT(ALT_O);
256 continue;
257
258 case 'w': /* The day of week, beginning on sunday. */
259 bp = conv_num(bp, &tm->tm_wday, 0, 6);
260 LEGAL_ALT(ALT_O);
261 continue;
262
263 case 'u': /* The day of week, monday = 1. */
264 bp = conv_num(bp, &i, 1, 7);
265 tm->tm_wday = i % 7;
266 LEGAL_ALT(ALT_O);
267 continue;
268
269 case 'g': /* The year corresponding to the ISO week
270 * number but without the century.
271 */
272 bp = conv_num(bp, &i, 0, 99);
273 continue;
274
275 case 'G': /* The year corresponding to the ISO week
276 * number with century.
277 */
278 do
279 bp++;
280 while (isdigit(*bp));
281 continue;
282
283 case 'V': /* The ISO 8601:1988 week number as decimal */
284 bp = conv_num(bp, &i, 0, 53);
285 continue;
286
287 case 'Y': /* The year. */
288 i = TM_YEAR_BASE; /* just for data sanity... */
289 bp = conv_num(bp, &i, 0, 9999);
290 tm->tm_year = i - TM_YEAR_BASE;
291 LEGAL_ALT(ALT_E);
292 continue;
293
294 case 'y': /* The year within 100 years of the epoch. */
295 /* LEGAL_ALT(ALT_E | ALT_O); */
296 bp = conv_num(bp, &i, 0, 99);
297
298 if (split_year)
299 /* preserve century */
300 i += (tm->tm_year / 100) * 100;
301 else {
302 split_year = 1;
303 if (i <= 68)
304 i = i + 2000 - TM_YEAR_BASE;
305 else
306 i = i + 1900 - TM_YEAR_BASE;
307 }
308 tm->tm_year = i;
309 continue;
310
311 case 'Z':
312 tzset();
313 if (strncmp((const char *)bp, gmt, 3) == 0) {
314 tm->tm_isdst = 0;
315 #ifdef TM_GMTOFF
316 tm->TM_GMTOFF = 0;
317 #endif
318 #ifdef TM_ZONE
319 tm->TM_ZONE = gmt;
320 #endif
321 bp += 3;
322 } else {
323 const unsigned char *ep;
324
325 ep = find_string(bp, &i,
326 (const char * const *)tzname,
327 NULL, 2);
328 if (ep != NULL) {
329 tm->tm_isdst = i;
330 #ifdef TM_GMTOFF
331 tm->TM_GMTOFF = -(timezone);
332 #endif
333 #ifdef TM_ZONE
334 tm->TM_ZONE = tzname[i];
335 #endif
336 }
337 bp = ep;
338 }
339 continue;
340
341 case 'z':
342 /*
343 * We recognize all ISO 8601 formats:
344 * Z = Zulu time/UTC
345 * [+-]hhmm
346 * [+-]hh:mm
347 * [+-]hh
348 */
349 while (isspace(*bp))
350 bp++;
351
352 switch (*bp++) {
353 case 'Z':
354 tm->tm_isdst = 0;
355 #ifdef TM_GMTOFF
356 tm->TM_GMTOFF = 0;
357 #endif
358 #ifdef TM_ZONE
359 tm->TM_ZONE = utc;
360 #endif
361 continue;
362 case '+':
363 neg = 0;
364 break;
365 case '-':
366 neg = 1;
367 break;
368 default:
369 return NULL;
370 }
371 offs = 0;
372 for (i = 0; i < 4; ) {
373 if (isdigit(*bp)) {
374 offs = offs * 10 + (*bp++ - '0');
375 i++;
376 continue;
377 }
378 if (i == 2 && *bp == ':') {
379 bp++;
380 continue;
381 }
382 break;
383 }
384 switch (i) {
385 case 2:
386 offs *= 100;
387 break;
388 case 4:
389 i = offs % 100;
390 if (i >= 60)
391 return NULL;
392 /* Convert minutes into decimal */
393 offs = (offs / 100) * 100 + (i * 50) / 30;
394 break;
395 default:
396 return NULL;
397 }
398 tm->tm_isdst = 0; /* XXX */
399 #ifdef TM_GMTOFF
400 tm->TM_GMTOFF = offs;
401 #endif
402 #ifdef TM_ZONE
403 tm->TM_ZONE = NULL; /* XXX */
404 #endif
405 continue;
406
407 /*
408 * Miscellaneous conversions.
409 */
410 case 'n': /* Any kind of white-space. */
411 case 't':
412 while (isspace(*bp))
413 bp++;
414 LEGAL_ALT(0);
415 continue;
416
417
418 default: /* Unknown/unsupported conversion. */
419 return NULL;
420 }
421 }
422
423 return __UNCONST(bp);
424 }
425
426
427 static const u_char *
428 conv_num(const unsigned char *buf, int *dest, uint llim, uint ulim)
429 {
430 uint result = 0;
431 unsigned char ch;
432
433 /* The limit also determines the number of valid digits. */
434 uint rulim = ulim;
435
436 ch = *buf;
437 if (ch < '0' || ch > '9')
438 return NULL;
439
440 do {
441 result *= 10;
442 result += ch - '0';
443 rulim /= 10;
444 ch = *++buf;
445 } while ((result * 10 <= ulim) && rulim && ch >= '0' && ch <= '9');
446
447 if (result < llim || result > ulim)
448 return NULL;
449
450 *dest = result;
451 return buf;
452 }
453
454 static const u_char *
455 find_string(const u_char *bp, int *tgt, const char * const *n1,
456 const char * const *n2, int c)
457 {
458 int i;
459 unsigned int len;
460
461 /* check full name - then abbreviated ones */
462 for (; n1 != NULL; n1 = n2, n2 = NULL) {
463 for (i = 0; i < c; i++, n1++) {
464 len = strlen(*n1);
465 if (strncasecmp(*n1, (const char *)bp, len) == 0) {
466 *tgt = i;
467 return bp + len;
468 }
469 }
470 }
471
472 /* Nothing matched */
473 return NULL;
474 }
475