xref: /freebsd/lib/libc/stdtime/strptime.c (revision 02e9120893770924227138ba49df1edb3896112a)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2014 Gary Mills
5  * Copyright 2011, Nexenta Systems, Inc.  All rights reserved.
6  * Copyright (c) 1994 Powerdog Industries.  All rights reserved.
7  *
8  * Copyright (c) 2011 The FreeBSD Foundation
9  *
10  * Portions of this software were developed by David Chisnall
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer
20  *    in the documentation and/or other materials provided with the
21  *    distribution.
22  *
23  * THIS SOFTWARE IS PROVIDED BY POWERDOG INDUSTRIES ``AS IS'' AND ANY
24  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE POWERDOG INDUSTRIES BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
31  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
32  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
33  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  * The views and conclusions contained in the software and documentation
36  * are those of the authors and should not be interpreted as representing
37  * official policies, either expressed or implied, of Powerdog Industries.
38  */
39 
40 #include "namespace.h"
41 #include <time.h>
42 #include <ctype.h>
43 #include <errno.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <pthread.h>
47 #include "private.h"
48 #include "un-namespace.h"
49 #include "libc_private.h"
50 #include "timelocal.h"
51 #include "tzfile.h"
52 
53 static char * _strptime(const char *, const char *, struct tm *, int *, locale_t);
54 
55 #define	asizeof(a)	(sizeof(a) / sizeof((a)[0]))
56 
57 #define	FLAG_NONE	(1 << 0)
58 #define	FLAG_YEAR	(1 << 1)
59 #define	FLAG_MONTH	(1 << 2)
60 #define	FLAG_YDAY	(1 << 3)
61 #define	FLAG_MDAY	(1 << 4)
62 #define	FLAG_WDAY	(1 << 5)
63 
64 /*
65  * Calculate the week day of the first day of a year. Valid for
66  * the Gregorian calendar, which began Sept 14, 1752 in the UK
67  * and its colonies. Ref:
68  * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week
69  */
70 
71 static int
72 first_wday_of(int year)
73 {
74 	return (((2 * (3 - (year / 100) % 4)) + (year % 100) +
75 		((year % 100) / 4) + (isleap(year) ? 6 : 0) + 1) % 7);
76 }
77 
78 static char *
79 _strptime(const char *buf, const char *fmt, struct tm *tm, int *GMTp,
80 		locale_t locale)
81 {
82 	char	c;
83 	const char *ptr;
84 	int	day_offset = -1, wday_offset;
85 	int week_offset;
86 	int	i, len;
87 	int flags;
88 	int Ealternative, Oalternative;
89 	int century, year;
90 	const struct lc_time_T *tptr = __get_current_time_locale(locale);
91 	static int start_of_month[2][13] = {
92 		{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
93 		{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
94 	};
95 
96 	flags = FLAG_NONE;
97 	century = -1;
98 	year = -1;
99 
100 	ptr = fmt;
101 	while (*ptr != 0) {
102 		c = *ptr++;
103 
104 		if (c != '%') {
105 			if (isspace_l((unsigned char)c, locale))
106 				while (*buf != 0 &&
107 				       isspace_l((unsigned char)*buf, locale))
108 					buf++;
109 			else if (c != *buf++)
110 				return (NULL);
111 			continue;
112 		}
113 
114 		Ealternative = 0;
115 		Oalternative = 0;
116 label:
117 		c = *ptr++;
118 		switch (c) {
119 		case '%':
120 			if (*buf++ != '%')
121 				return (NULL);
122 			break;
123 
124 		case '+':
125 			buf = _strptime(buf, tptr->date_fmt, tm, GMTp, locale);
126 			if (buf == NULL)
127 				return (NULL);
128 			flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
129 			break;
130 
131 		case 'C':
132 			if (!isdigit_l((unsigned char)*buf, locale))
133 				return (NULL);
134 
135 			/* XXX This will break for 3-digit centuries. */
136 			len = 2;
137 			for (i = 0; len && *buf != 0 &&
138 			     isdigit_l((unsigned char)*buf, locale); buf++) {
139 				i *= 10;
140 				i += *buf - '0';
141 				len--;
142 			}
143 
144 			century = i;
145 			flags |= FLAG_YEAR;
146 
147 			break;
148 
149 		case 'c':
150 			buf = _strptime(buf, tptr->c_fmt, tm, GMTp, locale);
151 			if (buf == NULL)
152 				return (NULL);
153 			flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
154 			break;
155 
156 		case 'D':
157 			buf = _strptime(buf, "%m/%d/%y", tm, GMTp, locale);
158 			if (buf == NULL)
159 				return (NULL);
160 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
161 			break;
162 
163 		case 'E':
164 			if (Ealternative || Oalternative)
165 				break;
166 			Ealternative++;
167 			goto label;
168 
169 		case 'O':
170 			if (Ealternative || Oalternative)
171 				break;
172 			Oalternative++;
173 			goto label;
174 
175 		case 'F':
176 			buf = _strptime(buf, "%Y-%m-%d", tm, GMTp, locale);
177 			if (buf == NULL)
178 				return (NULL);
179 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
180 			break;
181 
182 		case 'R':
183 			buf = _strptime(buf, "%H:%M", tm, GMTp, locale);
184 			if (buf == NULL)
185 				return (NULL);
186 			break;
187 
188 		case 'r':
189 			buf = _strptime(buf, tptr->ampm_fmt, tm, GMTp, locale);
190 			if (buf == NULL)
191 				return (NULL);
192 			break;
193 
194 		case 'T':
195 			buf = _strptime(buf, "%H:%M:%S", tm, GMTp, locale);
196 			if (buf == NULL)
197 				return (NULL);
198 			break;
199 
200 		case 'X':
201 			buf = _strptime(buf, tptr->X_fmt, tm, GMTp, locale);
202 			if (buf == NULL)
203 				return (NULL);
204 			break;
205 
206 		case 'x':
207 			buf = _strptime(buf, tptr->x_fmt, tm, GMTp, locale);
208 			if (buf == NULL)
209 				return (NULL);
210 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
211 			break;
212 
213 		case 'j':
214 			if (!isdigit_l((unsigned char)*buf, locale))
215 				return (NULL);
216 
217 			len = 3;
218 			for (i = 0; len && *buf != 0 &&
219 			     isdigit_l((unsigned char)*buf, locale); buf++){
220 				i *= 10;
221 				i += *buf - '0';
222 				len--;
223 			}
224 			if (i < 1 || i > 366)
225 				return (NULL);
226 
227 			tm->tm_yday = i - 1;
228 			flags |= FLAG_YDAY;
229 
230 			break;
231 
232 		case 'M':
233 		case 'S':
234 			if (*buf == 0 ||
235 				isspace_l((unsigned char)*buf, locale))
236 				break;
237 
238 			if (!isdigit_l((unsigned char)*buf, locale))
239 				return (NULL);
240 
241 			len = 2;
242 			for (i = 0; len && *buf != 0 &&
243 				isdigit_l((unsigned char)*buf, locale); buf++){
244 				i *= 10;
245 				i += *buf - '0';
246 				len--;
247 			}
248 
249 			if (c == 'M') {
250 				if (i > 59)
251 					return (NULL);
252 				tm->tm_min = i;
253 			} else {
254 				if (i > 60)
255 					return (NULL);
256 				tm->tm_sec = i;
257 			}
258 
259 			break;
260 
261 		case 'H':
262 		case 'I':
263 		case 'k':
264 		case 'l':
265 			/*
266 			 * %k and %l specifiers are documented as being
267 			 * blank-padded.  However, there is no harm in
268 			 * allowing zero-padding.
269 			 *
270 			 * XXX %k and %l specifiers may gobble one too many
271 			 * digits if used incorrectly.
272 			 */
273 
274 			len = 2;
275 			if ((c == 'k' || c == 'l') &&
276 			    isblank_l((unsigned char)*buf, locale)) {
277 				buf++;
278 				len = 1;
279 			}
280 
281 			if (!isdigit_l((unsigned char)*buf, locale))
282 				return (NULL);
283 
284 			for (i = 0; len && *buf != 0 &&
285 			     isdigit_l((unsigned char)*buf, locale); buf++) {
286 				i *= 10;
287 				i += *buf - '0';
288 				len--;
289 			}
290 			if (c == 'H' || c == 'k') {
291 				if (i > 23)
292 					return (NULL);
293 			} else if (i == 0 || i > 12)
294 				return (NULL);
295 
296 			tm->tm_hour = i;
297 
298 			break;
299 
300 		case 'p':
301 			/*
302 			 * XXX This is bogus if parsed before hour-related
303 			 * specifiers.
304 			 */
305 			if (tm->tm_hour > 12)
306 				return (NULL);
307 
308 			len = strlen(tptr->am);
309 			if (strncasecmp_l(buf, tptr->am, len, locale) == 0) {
310 				if (tm->tm_hour == 12)
311 					tm->tm_hour = 0;
312 				buf += len;
313 				break;
314 			}
315 
316 			len = strlen(tptr->pm);
317 			if (strncasecmp_l(buf, tptr->pm, len, locale) == 0) {
318 				if (tm->tm_hour != 12)
319 					tm->tm_hour += 12;
320 				buf += len;
321 				break;
322 			}
323 
324 			return (NULL);
325 
326 		case 'A':
327 		case 'a':
328 			for (i = 0; i < asizeof(tptr->weekday); i++) {
329 				len = strlen(tptr->weekday[i]);
330 				if (strncasecmp_l(buf, tptr->weekday[i],
331 						len, locale) == 0)
332 					break;
333 				len = strlen(tptr->wday[i]);
334 				if (strncasecmp_l(buf, tptr->wday[i],
335 						len, locale) == 0)
336 					break;
337 			}
338 			if (i == asizeof(tptr->weekday))
339 				return (NULL);
340 
341 			buf += len;
342 			tm->tm_wday = i;
343 			flags |= FLAG_WDAY;
344 			break;
345 
346 		case 'U':
347 		case 'W':
348 			/*
349 			 * XXX This is bogus, as we can not assume any valid
350 			 * information present in the tm structure at this
351 			 * point to calculate a real value, so just check the
352 			 * range for now.
353 			 */
354 			if (!isdigit_l((unsigned char)*buf, locale))
355 				return (NULL);
356 
357 			len = 2;
358 			for (i = 0; len && *buf != 0 &&
359 			     isdigit_l((unsigned char)*buf, locale); buf++) {
360 				i *= 10;
361 				i += *buf - '0';
362 				len--;
363 			}
364 			if (i > 53)
365 				return (NULL);
366 
367 			if (c == 'U')
368 				day_offset = TM_SUNDAY;
369 			else
370 				day_offset = TM_MONDAY;
371 
372 
373 			week_offset = i;
374 
375 			break;
376 
377 		case 'u':
378 		case 'w':
379 			if (!isdigit_l((unsigned char)*buf, locale))
380 				return (NULL);
381 
382 			i = *buf++ - '0';
383 			if (i < 0 || i > 7 || (c == 'u' && i < 1) ||
384 			    (c == 'w' && i > 6))
385 				return (NULL);
386 
387 			tm->tm_wday = i % 7;
388 			flags |= FLAG_WDAY;
389 
390 			break;
391 
392 		case 'e':
393 			/*
394 			 * With %e format, our strftime(3) adds a blank space
395 			 * before single digits.
396 			 */
397 			if (*buf != 0 &&
398 			    isspace_l((unsigned char)*buf, locale))
399 			       buf++;
400 			/* FALLTHROUGH */
401 		case 'd':
402 			/*
403 			 * The %e specifier was once explicitly documented as
404 			 * not being zero-padded but was later changed to
405 			 * equivalent to %d.  There is no harm in allowing
406 			 * such padding.
407 			 *
408 			 * XXX The %e specifier may gobble one too many
409 			 * digits if used incorrectly.
410 			 */
411 			if (!isdigit_l((unsigned char)*buf, locale))
412 				return (NULL);
413 
414 			len = 2;
415 			for (i = 0; len && *buf != 0 &&
416 			     isdigit_l((unsigned char)*buf, locale); buf++) {
417 				i *= 10;
418 				i += *buf - '0';
419 				len--;
420 			}
421 			if (i == 0 || i > 31)
422 				return (NULL);
423 
424 			tm->tm_mday = i;
425 			flags |= FLAG_MDAY;
426 
427 			break;
428 
429 		case 'B':
430 		case 'b':
431 		case 'h':
432 			for (i = 0; i < asizeof(tptr->month); i++) {
433 				if (Oalternative) {
434 					if (c == 'B') {
435 						len = strlen(tptr->alt_month[i]);
436 						if (strncasecmp_l(buf,
437 								tptr->alt_month[i],
438 								len, locale) == 0)
439 							break;
440 					}
441 				} else {
442 					len = strlen(tptr->month[i]);
443 					if (strncasecmp_l(buf, tptr->month[i],
444 							len, locale) == 0)
445 						break;
446 				}
447 			}
448 			/*
449 			 * Try the abbreviated month name if the full name
450 			 * wasn't found and Oalternative was not requested.
451 			 */
452 			if (i == asizeof(tptr->month) && !Oalternative) {
453 				for (i = 0; i < asizeof(tptr->month); i++) {
454 					len = strlen(tptr->mon[i]);
455 					if (strncasecmp_l(buf, tptr->mon[i],
456 							len, locale) == 0)
457 						break;
458 				}
459 			}
460 			if (i == asizeof(tptr->month))
461 				return (NULL);
462 
463 			tm->tm_mon = i;
464 			buf += len;
465 			flags |= FLAG_MONTH;
466 
467 			break;
468 
469 		case 'm':
470 			if (!isdigit_l((unsigned char)*buf, locale))
471 				return (NULL);
472 
473 			len = 2;
474 			for (i = 0; len && *buf != 0 &&
475 			     isdigit_l((unsigned char)*buf, locale); buf++) {
476 				i *= 10;
477 				i += *buf - '0';
478 				len--;
479 			}
480 			if (i < 1 || i > 12)
481 				return (NULL);
482 
483 			tm->tm_mon = i - 1;
484 			flags |= FLAG_MONTH;
485 
486 			break;
487 
488 		case 's':
489 			{
490 			char *cp;
491 			int sverrno;
492 			long n;
493 			time_t t;
494 
495 			sverrno = errno;
496 			errno = 0;
497 			n = strtol_l(buf, &cp, 10, locale);
498 			if (errno == ERANGE || (long)(t = n) != n) {
499 				errno = sverrno;
500 				return (NULL);
501 			}
502 			errno = sverrno;
503 			buf = cp;
504 			if (gmtime_r(&t, tm) == NULL)
505 				return (NULL);
506 			*GMTp = 1;
507 			flags |= FLAG_YDAY | FLAG_WDAY | FLAG_MONTH |
508 			    FLAG_MDAY | FLAG_YEAR;
509 			}
510 			break;
511 
512 		case 'Y':
513 		case 'y':
514 			if (*buf == 0 ||
515 			    isspace_l((unsigned char)*buf, locale))
516 				break;
517 
518 			if (!isdigit_l((unsigned char)*buf, locale))
519 				return (NULL);
520 
521 			len = (c == 'Y') ? 4 : 2;
522 			for (i = 0; len && *buf != 0 &&
523 			     isdigit_l((unsigned char)*buf, locale); buf++) {
524 				i *= 10;
525 				i += *buf - '0';
526 				len--;
527 			}
528 			if (c == 'Y')
529 				century = i / 100;
530 			year = i % 100;
531 
532 			flags |= FLAG_YEAR;
533 
534 			break;
535 
536 		case 'Z':
537 			{
538 			const char *cp;
539 			char *zonestr;
540 
541 			for (cp = buf; *cp &&
542 			     isupper_l((unsigned char)*cp, locale); ++cp) {
543 				/*empty*/}
544 			if (cp - buf) {
545 				zonestr = alloca(cp - buf + 1);
546 				strncpy(zonestr, buf, cp - buf);
547 				zonestr[cp - buf] = '\0';
548 				tzset();
549 				if (0 == strcmp(zonestr, "GMT") ||
550 				    0 == strcmp(zonestr, "UTC")) {
551 				    *GMTp = 1;
552 				} else if (0 == strcmp(zonestr, tzname[0])) {
553 				    tm->tm_isdst = 0;
554 				} else if (0 == strcmp(zonestr, tzname[1])) {
555 				    tm->tm_isdst = 1;
556 				} else {
557 				    return (NULL);
558 				}
559 				buf += cp - buf;
560 			}
561 			}
562 			break;
563 
564 		case 'z':
565 			{
566 			int sign = 1;
567 
568 			if (*buf != '+') {
569 				if (*buf == '-')
570 					sign = -1;
571 				else
572 					return (NULL);
573 			}
574 
575 			buf++;
576 			i = 0;
577 			for (len = 4; len > 0; len--) {
578 				if (isdigit_l((unsigned char)*buf, locale)) {
579 					i *= 10;
580 					i += *buf - '0';
581 					buf++;
582 				} else if (len == 2) {
583 					i *= 100;
584 					break;
585 				} else
586 					return (NULL);
587 			}
588 
589 			if (i > 1400 || (sign == -1 && i > 1200) ||
590 			    (i % 100) >= 60)
591 				return (NULL);
592 			tm->tm_hour -= sign * (i / 100);
593 			tm->tm_min  -= sign * (i % 100);
594 			*GMTp = 1;
595 			}
596 			break;
597 
598 		case 'n':
599 		case 't':
600 			while (isspace_l((unsigned char)*buf, locale))
601 				buf++;
602 			break;
603 
604 		default:
605 			return (NULL);
606 		}
607 	}
608 
609 	if (century != -1 || year != -1) {
610 		if (year == -1)
611 			year = 0;
612 		if (century == -1) {
613 			if (year < 69)
614 				year += 100;
615 		} else
616 			year += century * 100 - TM_YEAR_BASE;
617 		tm->tm_year = year;
618 	}
619 
620 	if (!(flags & FLAG_YDAY) && (flags & FLAG_YEAR)) {
621 		if ((flags & (FLAG_MONTH | FLAG_MDAY)) ==
622 		    (FLAG_MONTH | FLAG_MDAY)) {
623 			tm->tm_yday = start_of_month[isleap(tm->tm_year +
624 			    TM_YEAR_BASE)][tm->tm_mon] + (tm->tm_mday - 1);
625 			flags |= FLAG_YDAY;
626 		} else if (day_offset != -1) {
627 			int tmpwday, tmpyday, fwo;
628 
629 			fwo = first_wday_of(tm->tm_year + TM_YEAR_BASE);
630 			/* No incomplete week (week 0). */
631 			if (week_offset == 0 && fwo == day_offset)
632 				return (NULL);
633 
634 			/* Set the date to the first Sunday (or Monday)
635 			 * of the specified week of the year.
636 			 */
637 			tmpwday = (flags & FLAG_WDAY) ? tm->tm_wday :
638 			    day_offset;
639 			tmpyday = (7 - fwo + day_offset) % 7 +
640 			    (week_offset - 1) * 7 +
641 			    (tmpwday - day_offset + 7) % 7;
642 			/* Impossible yday for incomplete week (week 0). */
643 			if (tmpyday < 0) {
644 				if (flags & FLAG_WDAY)
645 					return (NULL);
646 				tmpyday = 0;
647 			}
648 			tm->tm_yday = tmpyday;
649 			flags |= FLAG_YDAY;
650 		}
651 	}
652 
653 	if ((flags & (FLAG_YEAR | FLAG_YDAY)) == (FLAG_YEAR | FLAG_YDAY)) {
654 		if (!(flags & FLAG_MONTH)) {
655 			i = 0;
656 			while (tm->tm_yday >=
657 			    start_of_month[isleap(tm->tm_year +
658 			    TM_YEAR_BASE)][i])
659 				i++;
660 			if (i > 12) {
661 				i = 1;
662 				tm->tm_yday -=
663 				    start_of_month[isleap(tm->tm_year +
664 				    TM_YEAR_BASE)][12];
665 				tm->tm_year++;
666 			}
667 			tm->tm_mon = i - 1;
668 			flags |= FLAG_MONTH;
669 		}
670 		if (!(flags & FLAG_MDAY)) {
671 			tm->tm_mday = tm->tm_yday -
672 			    start_of_month[isleap(tm->tm_year + TM_YEAR_BASE)]
673 			    [tm->tm_mon] + 1;
674 			flags |= FLAG_MDAY;
675 		}
676 		if (!(flags & FLAG_WDAY)) {
677 			i = 0;
678 			wday_offset = first_wday_of(tm->tm_year);
679 			while (i++ <= tm->tm_yday) {
680 				if (wday_offset++ >= 6)
681 					wday_offset = 0;
682 			}
683 			tm->tm_wday = wday_offset;
684 			flags |= FLAG_WDAY;
685 		}
686 	}
687 
688 	return ((char *)buf);
689 }
690 
691 char *
692 strptime_l(const char * __restrict buf, const char * __restrict fmt,
693     struct tm * __restrict tm, locale_t loc)
694 {
695 	char *ret;
696 	int gmt;
697 	FIX_LOCALE(loc);
698 
699 	gmt = 0;
700 	ret = _strptime(buf, fmt, tm, &gmt, loc);
701 	if (ret && gmt) {
702 		time_t t = timegm(tm);
703 
704 		localtime_r(&t, tm);
705 	}
706 
707 	return (ret);
708 }
709 
710 char *
711 strptime(const char * __restrict buf, const char * __restrict fmt,
712     struct tm * __restrict tm)
713 {
714 	return strptime_l(buf, fmt, tm, __get_locale());
715 }
716