xref: /freebsd/lib/libc/stdtime/strptime.c (revision 57718be8fa0bd5edc11ab9a72e68cc71982939a6)
1 /*-
2  * Copyright (c) 2014 Gary Mills
3  * Copyright 2011, Nexenta Systems, Inc.  All rights reserved.
4  * Copyright (c) 1994 Powerdog Industries.  All rights reserved.
5  *
6  * Copyright (c) 2011 The FreeBSD Foundation
7  * All rights reserved.
8  * Portions of this software were developed by David Chisnall
9  * under sponsorship from the FreeBSD Foundation.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer
18  *    in the documentation and/or other materials provided with the
19  *    distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY POWERDOG INDUSTRIES ``AS IS'' AND ANY
22  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE POWERDOG INDUSTRIES BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
28  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
29  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
30  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
31  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32  *
33  * The views and conclusions contained in the software and documentation
34  * are those of the authors and should not be interpreted as representing
35  * official policies, either expressed or implied, of Powerdog Industries.
36  */
37 
38 #include <sys/cdefs.h>
39 #ifndef lint
40 #ifndef NOID
41 static char copyright[] __unused =
42 "@(#) Copyright (c) 1994 Powerdog Industries.  All rights reserved.";
43 static char sccsid[] __unused = "@(#)strptime.c	0.1 (Powerdog) 94/03/27";
44 #endif /* !defined NOID */
45 #endif /* not lint */
46 __FBSDID("$FreeBSD$");
47 
48 #include "namespace.h"
49 #include <time.h>
50 #include <ctype.h>
51 #include <errno.h>
52 #include <stdlib.h>
53 #include <string.h>
54 #include <pthread.h>
55 #include "un-namespace.h"
56 #include "libc_private.h"
57 #include "timelocal.h"
58 #include "tzfile.h"
59 
60 static char * _strptime(const char *, const char *, struct tm *, int *, locale_t);
61 
62 #define	asizeof(a)	(sizeof(a) / sizeof((a)[0]))
63 
64 #define	FLAG_NONE	(1 << 0)
65 #define	FLAG_YEAR	(1 << 1)
66 #define	FLAG_MONTH	(1 << 2)
67 #define	FLAG_YDAY	(1 << 3)
68 #define	FLAG_MDAY	(1 << 4)
69 #define	FLAG_WDAY	(1 << 5)
70 
71 /*
72  * Calculate the week day of the first day of a year. Valid for
73  * the Gregorian calendar, which began Sept 14, 1752 in the UK
74  * and its colonies. Ref:
75  * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week
76  */
77 
78 static int
79 first_wday_of(int year)
80 {
81 	return (((2 * (3 - (year / 100) % 4)) + (year % 100) +
82 		((year % 100) / 4) + (isleap(year) ? 6 : 0) + 1) % 7);
83 }
84 
85 static char *
86 _strptime(const char *buf, const char *fmt, struct tm *tm, int *GMTp,
87 		locale_t locale)
88 {
89 	char	c;
90 	const char *ptr;
91 	int	day_offset = -1, wday_offset;
92 	int week_offset;
93 	int	i, len;
94 	int flags;
95 	int Ealternative, Oalternative;
96 	const struct lc_time_T *tptr = __get_current_time_locale(locale);
97 	static int start_of_month[2][13] = {
98 		{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
99 		{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
100 	};
101 
102 	flags = FLAG_NONE;
103 
104 	ptr = fmt;
105 	while (*ptr != 0) {
106 		if (*buf == 0)
107 			break;
108 
109 		c = *ptr++;
110 
111 		if (c != '%') {
112 			if (isspace_l((unsigned char)c, locale))
113 				while (*buf != 0 &&
114 				       isspace_l((unsigned char)*buf, locale))
115 					buf++;
116 			else if (c != *buf++)
117 				return (NULL);
118 			continue;
119 		}
120 
121 		Ealternative = 0;
122 		Oalternative = 0;
123 label:
124 		c = *ptr++;
125 		switch (c) {
126 		case 0:
127 		case '%':
128 			if (*buf++ != '%')
129 				return (NULL);
130 			break;
131 
132 		case '+':
133 			buf = _strptime(buf, tptr->date_fmt, tm, GMTp, locale);
134 			if (buf == NULL)
135 				return (NULL);
136 			flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
137 			break;
138 
139 		case 'C':
140 			if (!isdigit_l((unsigned char)*buf, locale))
141 				return (NULL);
142 
143 			/* XXX This will break for 3-digit centuries. */
144 			len = 2;
145 			for (i = 0; len && *buf != 0 &&
146 			     isdigit_l((unsigned char)*buf, locale); buf++) {
147 				i *= 10;
148 				i += *buf - '0';
149 				len--;
150 			}
151 			if (i < 19)
152 				return (NULL);
153 
154 			tm->tm_year = i * 100 - TM_YEAR_BASE;
155 			flags |= FLAG_YEAR;
156 
157 			break;
158 
159 		case 'c':
160 			buf = _strptime(buf, tptr->c_fmt, tm, GMTp, locale);
161 			if (buf == NULL)
162 				return (NULL);
163 			flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
164 			break;
165 
166 		case 'D':
167 			buf = _strptime(buf, "%m/%d/%y", tm, GMTp, locale);
168 			if (buf == NULL)
169 				return (NULL);
170 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
171 			break;
172 
173 		case 'E':
174 			if (Ealternative || Oalternative)
175 				break;
176 			Ealternative++;
177 			goto label;
178 
179 		case 'O':
180 			if (Ealternative || Oalternative)
181 				break;
182 			Oalternative++;
183 			goto label;
184 
185 		case 'F':
186 			buf = _strptime(buf, "%Y-%m-%d", tm, GMTp, locale);
187 			if (buf == NULL)
188 				return (NULL);
189 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
190 			break;
191 
192 		case 'R':
193 			buf = _strptime(buf, "%H:%M", tm, GMTp, locale);
194 			if (buf == NULL)
195 				return (NULL);
196 			break;
197 
198 		case 'r':
199 			buf = _strptime(buf, tptr->ampm_fmt, tm, GMTp, locale);
200 			if (buf == NULL)
201 				return (NULL);
202 			break;
203 
204 		case 'T':
205 			buf = _strptime(buf, "%H:%M:%S", tm, GMTp, locale);
206 			if (buf == NULL)
207 				return (NULL);
208 			break;
209 
210 		case 'X':
211 			buf = _strptime(buf, tptr->X_fmt, tm, GMTp, locale);
212 			if (buf == NULL)
213 				return (NULL);
214 			break;
215 
216 		case 'x':
217 			buf = _strptime(buf, tptr->x_fmt, tm, GMTp, locale);
218 			if (buf == NULL)
219 				return (NULL);
220 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
221 			break;
222 
223 		case 'j':
224 			if (!isdigit_l((unsigned char)*buf, locale))
225 				return (NULL);
226 
227 			len = 3;
228 			for (i = 0; len && *buf != 0 &&
229 			     isdigit_l((unsigned char)*buf, locale); buf++){
230 				i *= 10;
231 				i += *buf - '0';
232 				len--;
233 			}
234 			if (i < 1 || i > 366)
235 				return (NULL);
236 
237 			tm->tm_yday = i - 1;
238 			flags |= FLAG_YDAY;
239 
240 			break;
241 
242 		case 'M':
243 		case 'S':
244 			if (*buf == 0 ||
245 				isspace_l((unsigned char)*buf, locale))
246 				break;
247 
248 			if (!isdigit_l((unsigned char)*buf, locale))
249 				return (NULL);
250 
251 			len = 2;
252 			for (i = 0; len && *buf != 0 &&
253 				isdigit_l((unsigned char)*buf, locale); buf++){
254 				i *= 10;
255 				i += *buf - '0';
256 				len--;
257 			}
258 
259 			if (c == 'M') {
260 				if (i > 59)
261 					return (NULL);
262 				tm->tm_min = i;
263 			} else {
264 				if (i > 60)
265 					return (NULL);
266 				tm->tm_sec = i;
267 			}
268 
269 			break;
270 
271 		case 'H':
272 		case 'I':
273 		case 'k':
274 		case 'l':
275 			/*
276 			 * Of these, %l is the only specifier explicitly
277 			 * documented as not being zero-padded.  However,
278 			 * there is no harm in allowing zero-padding.
279 			 *
280 			 * XXX The %l specifier may gobble one too many
281 			 * digits if used incorrectly.
282 			 */
283 			if (!isdigit_l((unsigned char)*buf, locale))
284 				return (NULL);
285 
286 			len = 2;
287 			for (i = 0; len && *buf != 0 &&
288 			     isdigit_l((unsigned char)*buf, locale); buf++) {
289 				i *= 10;
290 				i += *buf - '0';
291 				len--;
292 			}
293 			if (c == 'H' || c == 'k') {
294 				if (i > 23)
295 					return (NULL);
296 			} else if (i > 12)
297 				return (NULL);
298 
299 			tm->tm_hour = i;
300 
301 			break;
302 
303 		case 'p':
304 			/*
305 			 * XXX This is bogus if parsed before hour-related
306 			 * specifiers.
307 			 */
308 			len = strlen(tptr->am);
309 			if (strncasecmp_l(buf, tptr->am, len, locale) == 0) {
310 				if (tm->tm_hour > 12)
311 					return (NULL);
312 				if (tm->tm_hour == 12)
313 					tm->tm_hour = 0;
314 				buf += len;
315 				break;
316 			}
317 
318 			len = strlen(tptr->pm);
319 			if (strncasecmp_l(buf, tptr->pm, len, locale) == 0) {
320 				if (tm->tm_hour > 12)
321 					return (NULL);
322 				if (tm->tm_hour != 12)
323 					tm->tm_hour += 12;
324 				buf += len;
325 				break;
326 			}
327 
328 			return (NULL);
329 
330 		case 'A':
331 		case 'a':
332 			for (i = 0; i < asizeof(tptr->weekday); i++) {
333 				len = strlen(tptr->weekday[i]);
334 				if (strncasecmp_l(buf, tptr->weekday[i],
335 						len, locale) == 0)
336 					break;
337 				len = strlen(tptr->wday[i]);
338 				if (strncasecmp_l(buf, tptr->wday[i],
339 						len, locale) == 0)
340 					break;
341 			}
342 			if (i == asizeof(tptr->weekday))
343 				return (NULL);
344 
345 			buf += len;
346 			tm->tm_wday = i;
347 			flags |= FLAG_WDAY;
348 			break;
349 
350 		case 'U':
351 		case 'W':
352 			/*
353 			 * XXX This is bogus, as we can not assume any valid
354 			 * information present in the tm structure at this
355 			 * point to calculate a real value, so just check the
356 			 * range for now.
357 			 */
358 			if (!isdigit_l((unsigned char)*buf, locale))
359 				return (NULL);
360 
361 			len = 2;
362 			for (i = 0; len && *buf != 0 &&
363 			     isdigit_l((unsigned char)*buf, locale); buf++) {
364 				i *= 10;
365 				i += *buf - '0';
366 				len--;
367 			}
368 			if (i > 53)
369 				return (NULL);
370 
371 			if (c == 'U')
372 				day_offset = TM_SUNDAY;
373 			else
374 				day_offset = TM_MONDAY;
375 
376 
377 			week_offset = i;
378 
379 			break;
380 
381 		case 'w':
382 			if (!isdigit_l((unsigned char)*buf, locale))
383 				return (NULL);
384 
385 			i = *buf - '0';
386 			if (i > 6)
387 				return (NULL);
388 
389 			tm->tm_wday = i;
390 			flags |= FLAG_WDAY;
391 
392 			break;
393 
394 		case 'e':
395 			/*
396 			 * With %e format, our strftime(3) adds a blank space
397 			 * before single digits.
398 			 */
399 			if (*buf != 0 &&
400 			    isspace_l((unsigned char)*buf, locale))
401 			       buf++;
402 			/* FALLTHROUGH */
403 		case 'd':
404 			/*
405 			 * The %e specifier was once explicitly documented as
406 			 * not being zero-padded but was later changed to
407 			 * equivalent to %d.  There is no harm in allowing
408 			 * such padding.
409 			 *
410 			 * XXX The %e specifier may gobble one too many
411 			 * digits if used incorrectly.
412 			 */
413 			if (!isdigit_l((unsigned char)*buf, locale))
414 				return (NULL);
415 
416 			len = 2;
417 			for (i = 0; len && *buf != 0 &&
418 			     isdigit_l((unsigned char)*buf, locale); buf++) {
419 				i *= 10;
420 				i += *buf - '0';
421 				len--;
422 			}
423 			if (i > 31)
424 				return (NULL);
425 
426 			tm->tm_mday = i;
427 			flags |= FLAG_MDAY;
428 
429 			break;
430 
431 		case 'B':
432 		case 'b':
433 		case 'h':
434 			for (i = 0; i < asizeof(tptr->month); i++) {
435 				if (Oalternative) {
436 					if (c == 'B') {
437 						len = strlen(tptr->alt_month[i]);
438 						if (strncasecmp_l(buf,
439 								tptr->alt_month[i],
440 								len, locale) == 0)
441 							break;
442 					}
443 				} else {
444 					len = strlen(tptr->month[i]);
445 					if (strncasecmp_l(buf, tptr->month[i],
446 							len, locale) == 0)
447 						break;
448 				}
449 			}
450 			/*
451 			 * Try the abbreviated month name if the full name
452 			 * wasn't found and Oalternative was not requested.
453 			 */
454 			if (i == asizeof(tptr->month) && !Oalternative) {
455 				for (i = 0; i < asizeof(tptr->month); i++) {
456 					len = strlen(tptr->mon[i]);
457 					if (strncasecmp_l(buf, tptr->mon[i],
458 							len, locale) == 0)
459 						break;
460 				}
461 			}
462 			if (i == asizeof(tptr->month))
463 				return (NULL);
464 
465 			tm->tm_mon = i;
466 			buf += len;
467 			flags |= FLAG_MONTH;
468 
469 			break;
470 
471 		case 'm':
472 			if (!isdigit_l((unsigned char)*buf, locale))
473 				return (NULL);
474 
475 			len = 2;
476 			for (i = 0; len && *buf != 0 &&
477 			     isdigit_l((unsigned char)*buf, locale); buf++) {
478 				i *= 10;
479 				i += *buf - '0';
480 				len--;
481 			}
482 			if (i < 1 || i > 12)
483 				return (NULL);
484 
485 			tm->tm_mon = i - 1;
486 			flags |= FLAG_MONTH;
487 
488 			break;
489 
490 		case 's':
491 			{
492 			char *cp;
493 			int sverrno;
494 			long n;
495 			time_t t;
496 
497 			sverrno = errno;
498 			errno = 0;
499 			n = strtol_l(buf, &cp, 10, locale);
500 			if (errno == ERANGE || (long)(t = n) != n) {
501 				errno = sverrno;
502 				return (NULL);
503 			}
504 			errno = sverrno;
505 			buf = cp;
506 			if (gmtime_r(&t, tm) == NULL)
507 				return (NULL);
508 			*GMTp = 1;
509 			flags |= FLAG_YDAY | FLAG_WDAY | FLAG_MONTH |
510 			    FLAG_MDAY | FLAG_YEAR;
511 			}
512 			break;
513 
514 		case 'Y':
515 		case 'y':
516 			if (*buf == 0 ||
517 			    isspace_l((unsigned char)*buf, locale))
518 				break;
519 
520 			if (!isdigit_l((unsigned char)*buf, locale))
521 				return (NULL);
522 
523 			len = (c == 'Y') ? 4 : 2;
524 			for (i = 0; len && *buf != 0 &&
525 			     isdigit_l((unsigned char)*buf, locale); buf++) {
526 				i *= 10;
527 				i += *buf - '0';
528 				len--;
529 			}
530 			if (c == 'Y')
531 				i -= TM_YEAR_BASE;
532 			if (c == 'y' && i < 69)
533 				i += 100;
534 			if (i < 0)
535 				return (NULL);
536 
537 			tm->tm_year = i;
538 			flags |= FLAG_YEAR;
539 
540 			break;
541 
542 		case 'Z':
543 			{
544 			const char *cp;
545 			char *zonestr;
546 
547 			for (cp = buf; *cp &&
548 			     isupper_l((unsigned char)*cp, locale); ++cp) {
549 				/*empty*/}
550 			if (cp - buf) {
551 				zonestr = alloca(cp - buf + 1);
552 				strncpy(zonestr, buf, cp - buf);
553 				zonestr[cp - buf] = '\0';
554 				tzset();
555 				if (0 == strcmp(zonestr, "GMT")) {
556 				    *GMTp = 1;
557 				} else if (0 == strcmp(zonestr, tzname[0])) {
558 				    tm->tm_isdst = 0;
559 				} else if (0 == strcmp(zonestr, tzname[1])) {
560 				    tm->tm_isdst = 1;
561 				} else {
562 				    return (NULL);
563 				}
564 				buf += cp - buf;
565 			}
566 			}
567 			break;
568 
569 		case 'z':
570 			{
571 			int sign = 1;
572 
573 			if (*buf != '+') {
574 				if (*buf == '-')
575 					sign = -1;
576 				else
577 					return (NULL);
578 			}
579 
580 			buf++;
581 			i = 0;
582 			for (len = 4; len > 0; len--) {
583 				if (isdigit_l((unsigned char)*buf, locale)) {
584 					i *= 10;
585 					i += *buf - '0';
586 					buf++;
587 				} else
588 					return (NULL);
589 			}
590 
591 			tm->tm_hour -= sign * (i / 100);
592 			tm->tm_min  -= sign * (i % 100);
593 			*GMTp = 1;
594 			}
595 			break;
596 
597 		case 'n':
598 		case 't':
599 			while (isspace_l((unsigned char)*buf, locale))
600 				buf++;
601 			break;
602 		}
603 	}
604 
605 	if (!(flags & FLAG_YDAY) && (flags & FLAG_YEAR)) {
606 		if ((flags & (FLAG_MONTH | FLAG_MDAY)) ==
607 		    (FLAG_MONTH | FLAG_MDAY)) {
608 			tm->tm_yday = start_of_month[isleap(tm->tm_year +
609 			    TM_YEAR_BASE)][tm->tm_mon] + (tm->tm_mday - 1);
610 			flags |= FLAG_YDAY;
611 		} else if (day_offset != -1) {
612 			/* Set the date to the first Sunday (or Monday)
613 			 * of the specified week of the year.
614 			 */
615 			if (!(flags & FLAG_WDAY)) {
616 				tm->tm_wday = day_offset;
617 				flags |= FLAG_WDAY;
618 			}
619 			tm->tm_yday = (7 -
620 			    first_wday_of(tm->tm_year + TM_YEAR_BASE) +
621 			    day_offset) % 7 + (week_offset - 1) * 7 +
622 			    tm->tm_wday - day_offset;
623 			flags |= FLAG_YDAY;
624 		}
625 	}
626 
627 	if ((flags & (FLAG_YEAR | FLAG_YDAY)) == (FLAG_YEAR | FLAG_YDAY)) {
628 		if (!(flags & FLAG_MONTH)) {
629 			i = 0;
630 			while (tm->tm_yday >=
631 			    start_of_month[isleap(tm->tm_year +
632 			    TM_YEAR_BASE)][i])
633 				i++;
634 			if (i > 12) {
635 				i = 1;
636 				tm->tm_yday -=
637 				    start_of_month[isleap(tm->tm_year +
638 				    TM_YEAR_BASE)][12];
639 				tm->tm_year++;
640 			}
641 			tm->tm_mon = i - 1;
642 			flags |= FLAG_MONTH;
643 		}
644 		if (!(flags & FLAG_MDAY)) {
645 			tm->tm_mday = tm->tm_yday -
646 			    start_of_month[isleap(tm->tm_year + TM_YEAR_BASE)]
647 			    [tm->tm_mon] + 1;
648 			flags |= FLAG_MDAY;
649 		}
650 		if (!(flags & FLAG_WDAY)) {
651 			i = 0;
652 			wday_offset = first_wday_of(tm->tm_year);
653 			while (i++ <= tm->tm_yday) {
654 				if (wday_offset++ >= 6)
655 					wday_offset = 0;
656 			}
657 			tm->tm_wday = wday_offset;
658 			flags |= FLAG_WDAY;
659 		}
660 	}
661 
662 	return ((char *)buf);
663 }
664 
665 char *
666 strptime_l(const char * __restrict buf, const char * __restrict fmt,
667     struct tm * __restrict tm, locale_t loc)
668 {
669 	char *ret;
670 	int gmt;
671 	FIX_LOCALE(loc);
672 
673 	gmt = 0;
674 	ret = _strptime(buf, fmt, tm, &gmt, loc);
675 	if (ret && gmt) {
676 		time_t t = timegm(tm);
677 		localtime_r(&t, tm);
678 	}
679 
680 	return (ret);
681 }
682 
683 char *
684 strptime(const char * __restrict buf, const char * __restrict fmt,
685     struct tm * __restrict tm)
686 {
687 	return strptime_l(buf, fmt, tm, __get_locale());
688 }
689