xref: /freebsd/lib/libc/stdtime/strptime.c (revision 3a56015a2f5d630910177fa79a522bb95511ccf7)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2014 Gary Mills
5  * Copyright 2011, Nexenta Systems, Inc.  All rights reserved.
6  * Copyright (c) 1994 Powerdog Industries.  All rights reserved.
7  *
8  * Copyright (c) 2011 The FreeBSD Foundation
9  *
10  * Portions of this software were developed by David Chisnall
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer
20  *    in the documentation and/or other materials provided with the
21  *    distribution.
22  *
23  * THIS SOFTWARE IS PROVIDED BY POWERDOG INDUSTRIES ``AS IS'' AND ANY
24  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE POWERDOG INDUSTRIES BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
31  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
32  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
33  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  * The views and conclusions contained in the software and documentation
36  * are those of the authors and should not be interpreted as representing
37  * official policies, either expressed or implied, of Powerdog Industries.
38  */
39 
40 #include "namespace.h"
41 #include <time.h>
42 #include <ctype.h>
43 #include <errno.h>
44 #include <stdlib.h>
45 #include <string.h>
46 #include <pthread.h>
47 #include "private.h"
48 #include "un-namespace.h"
49 #include "libc_private.h"
50 #include "timelocal.h"
51 #include "tzfile.h"
52 
53 static char * _strptime(const char *, const char *, struct tm *, int *, locale_t);
54 
55 #define	asizeof(a)	(sizeof(a) / sizeof((a)[0]))
56 
57 #define	FLAG_NONE	(1 << 0)
58 #define	FLAG_YEAR	(1 << 1)
59 #define	FLAG_MONTH	(1 << 2)
60 #define	FLAG_YDAY	(1 << 3)
61 #define	FLAG_MDAY	(1 << 4)
62 #define	FLAG_WDAY	(1 << 5)
63 
64 /*
65  * Gauss's algorithm for the day of the week of the first day of any year
66  * in the Gregorian calendar.
67  */
68 static int
69 first_wday_of(int year)
70 {
71 	return ((1 +
72 	    5 * ((year - 1) % 4) +
73 	    4 * ((year - 1) % 100) +
74 	    6 * ((year - 1) % 400)) % 7);
75 }
76 
77 static char *
78 _strptime(const char *buf, const char *fmt, struct tm *tm, int *GMTp,
79 		locale_t locale)
80 {
81 	char	c;
82 	const char *ptr;
83 	int	day_offset = -1, wday_offset;
84 	int week_offset;
85 	int	i, len;
86 	int flags;
87 	int Ealternative, Oalternative;
88 	int century, year;
89 	const struct lc_time_T *tptr = __get_current_time_locale(locale);
90 	static int start_of_month[2][13] = {
91 		{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
92 		{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
93 	};
94 
95 	flags = FLAG_NONE;
96 	century = -1;
97 	year = -1;
98 
99 	ptr = fmt;
100 	while (*ptr != 0) {
101 		c = *ptr++;
102 
103 		if (c != '%') {
104 			if (isspace_l((unsigned char)c, locale))
105 				while (*buf != 0 &&
106 				       isspace_l((unsigned char)*buf, locale))
107 					buf++;
108 			else if (c != *buf++)
109 				return (NULL);
110 			continue;
111 		}
112 
113 		Ealternative = 0;
114 		Oalternative = 0;
115 label:
116 		c = *ptr++;
117 		switch (c) {
118 		case '%':
119 			if (*buf++ != '%')
120 				return (NULL);
121 			break;
122 
123 		case '+':
124 			buf = _strptime(buf, tptr->date_fmt, tm, GMTp, locale);
125 			if (buf == NULL)
126 				return (NULL);
127 			flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
128 			break;
129 
130 		case 'C':
131 			if (!isdigit_l((unsigned char)*buf, locale))
132 				return (NULL);
133 
134 			/* XXX This will break for 3-digit centuries. */
135 			len = 2;
136 			for (i = 0; len && *buf != 0 &&
137 			     isdigit_l((unsigned char)*buf, locale); buf++) {
138 				i *= 10;
139 				i += *buf - '0';
140 				len--;
141 			}
142 
143 			century = i;
144 			flags |= FLAG_YEAR;
145 
146 			break;
147 
148 		case 'c':
149 			buf = _strptime(buf, tptr->c_fmt, tm, GMTp, locale);
150 			if (buf == NULL)
151 				return (NULL);
152 			flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
153 			break;
154 
155 		case 'D':
156 			buf = _strptime(buf, "%m/%d/%y", tm, GMTp, locale);
157 			if (buf == NULL)
158 				return (NULL);
159 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
160 			break;
161 
162 		case 'E':
163 			if (Ealternative || Oalternative)
164 				break;
165 			Ealternative++;
166 			goto label;
167 
168 		case 'O':
169 			if (Ealternative || Oalternative)
170 				break;
171 			Oalternative++;
172 			goto label;
173 
174 		case 'F':
175 			buf = _strptime(buf, "%Y-%m-%d", tm, GMTp, locale);
176 			if (buf == NULL)
177 				return (NULL);
178 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
179 			break;
180 
181 		case 'R':
182 			buf = _strptime(buf, "%H:%M", tm, GMTp, locale);
183 			if (buf == NULL)
184 				return (NULL);
185 			break;
186 
187 		case 'r':
188 			buf = _strptime(buf, tptr->ampm_fmt, tm, GMTp, locale);
189 			if (buf == NULL)
190 				return (NULL);
191 			break;
192 
193 		case 'T':
194 			buf = _strptime(buf, "%H:%M:%S", tm, GMTp, locale);
195 			if (buf == NULL)
196 				return (NULL);
197 			break;
198 
199 		case 'X':
200 			buf = _strptime(buf, tptr->X_fmt, tm, GMTp, locale);
201 			if (buf == NULL)
202 				return (NULL);
203 			break;
204 
205 		case 'x':
206 			buf = _strptime(buf, tptr->x_fmt, tm, GMTp, locale);
207 			if (buf == NULL)
208 				return (NULL);
209 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
210 			break;
211 
212 		case 'j':
213 			if (!isdigit_l((unsigned char)*buf, locale))
214 				return (NULL);
215 
216 			len = 3;
217 			for (i = 0; len && *buf != 0 &&
218 			     isdigit_l((unsigned char)*buf, locale); buf++){
219 				i *= 10;
220 				i += *buf - '0';
221 				len--;
222 			}
223 			if (i < 1 || i > 366)
224 				return (NULL);
225 
226 			tm->tm_yday = i - 1;
227 			flags |= FLAG_YDAY;
228 
229 			break;
230 
231 		case 'M':
232 		case 'S':
233 			if (*buf == 0 ||
234 				isspace_l((unsigned char)*buf, locale))
235 				break;
236 
237 			if (!isdigit_l((unsigned char)*buf, locale))
238 				return (NULL);
239 
240 			len = 2;
241 			for (i = 0; len && *buf != 0 &&
242 				isdigit_l((unsigned char)*buf, locale); buf++){
243 				i *= 10;
244 				i += *buf - '0';
245 				len--;
246 			}
247 
248 			if (c == 'M') {
249 				if (i > 59)
250 					return (NULL);
251 				tm->tm_min = i;
252 			} else {
253 				if (i > 60)
254 					return (NULL);
255 				tm->tm_sec = i;
256 			}
257 
258 			break;
259 
260 		case 'H':
261 		case 'I':
262 		case 'k':
263 		case 'l':
264 			/*
265 			 * %k and %l specifiers are documented as being
266 			 * blank-padded.  However, there is no harm in
267 			 * allowing zero-padding.
268 			 *
269 			 * XXX %k and %l specifiers may gobble one too many
270 			 * digits if used incorrectly.
271 			 */
272 
273 			len = 2;
274 			if ((c == 'k' || c == 'l') &&
275 			    isblank_l((unsigned char)*buf, locale)) {
276 				buf++;
277 				len = 1;
278 			}
279 
280 			if (!isdigit_l((unsigned char)*buf, locale))
281 				return (NULL);
282 
283 			for (i = 0; len && *buf != 0 &&
284 			     isdigit_l((unsigned char)*buf, locale); buf++) {
285 				i *= 10;
286 				i += *buf - '0';
287 				len--;
288 			}
289 			if (c == 'H' || c == 'k') {
290 				if (i > 23)
291 					return (NULL);
292 			} else if (i == 0 || i > 12)
293 				return (NULL);
294 
295 			tm->tm_hour = i;
296 
297 			break;
298 
299 		case 'p':
300 			/*
301 			 * XXX This is bogus if parsed before hour-related
302 			 * specifiers.
303 			 */
304 			if (tm->tm_hour > 12)
305 				return (NULL);
306 
307 			len = strlen(tptr->am);
308 			if (strncasecmp_l(buf, tptr->am, len, locale) == 0) {
309 				if (tm->tm_hour == 12)
310 					tm->tm_hour = 0;
311 				buf += len;
312 				break;
313 			}
314 
315 			len = strlen(tptr->pm);
316 			if (strncasecmp_l(buf, tptr->pm, len, locale) == 0) {
317 				if (tm->tm_hour != 12)
318 					tm->tm_hour += 12;
319 				buf += len;
320 				break;
321 			}
322 
323 			return (NULL);
324 
325 		case 'A':
326 		case 'a':
327 			for (i = 0; i < asizeof(tptr->weekday); i++) {
328 				len = strlen(tptr->weekday[i]);
329 				if (strncasecmp_l(buf, tptr->weekday[i],
330 						len, locale) == 0)
331 					break;
332 				len = strlen(tptr->wday[i]);
333 				if (strncasecmp_l(buf, tptr->wday[i],
334 						len, locale) == 0)
335 					break;
336 			}
337 			if (i == asizeof(tptr->weekday))
338 				return (NULL);
339 
340 			buf += len;
341 			tm->tm_wday = i;
342 			flags |= FLAG_WDAY;
343 			break;
344 
345 		case 'U':
346 		case 'W':
347 			/*
348 			 * XXX This is bogus, as we can not assume any valid
349 			 * information present in the tm structure at this
350 			 * point to calculate a real value, so just check the
351 			 * range for now.
352 			 */
353 			if (!isdigit_l((unsigned char)*buf, locale))
354 				return (NULL);
355 
356 			len = 2;
357 			for (i = 0; len && *buf != 0 &&
358 			     isdigit_l((unsigned char)*buf, locale); buf++) {
359 				i *= 10;
360 				i += *buf - '0';
361 				len--;
362 			}
363 			if (i > 53)
364 				return (NULL);
365 
366 			if (c == 'U')
367 				day_offset = TM_SUNDAY;
368 			else
369 				day_offset = TM_MONDAY;
370 
371 
372 			week_offset = i;
373 
374 			break;
375 
376 		case 'u':
377 		case 'w':
378 			if (!isdigit_l((unsigned char)*buf, locale))
379 				return (NULL);
380 
381 			i = *buf++ - '0';
382 			if (i < 0 || i > 7 || (c == 'u' && i < 1) ||
383 			    (c == 'w' && i > 6))
384 				return (NULL);
385 
386 			tm->tm_wday = i % 7;
387 			flags |= FLAG_WDAY;
388 
389 			break;
390 
391 		case 'e':
392 			/*
393 			 * With %e format, our strftime(3) adds a blank space
394 			 * before single digits.
395 			 */
396 			if (*buf != 0 &&
397 			    isspace_l((unsigned char)*buf, locale))
398 			       buf++;
399 			/* FALLTHROUGH */
400 		case 'd':
401 			/*
402 			 * The %e specifier was once explicitly documented as
403 			 * not being zero-padded but was later changed to
404 			 * equivalent to %d.  There is no harm in allowing
405 			 * such padding.
406 			 *
407 			 * XXX The %e specifier may gobble one too many
408 			 * digits if used incorrectly.
409 			 */
410 			if (!isdigit_l((unsigned char)*buf, locale))
411 				return (NULL);
412 
413 			len = 2;
414 			for (i = 0; len && *buf != 0 &&
415 			     isdigit_l((unsigned char)*buf, locale); buf++) {
416 				i *= 10;
417 				i += *buf - '0';
418 				len--;
419 			}
420 			if (i == 0 || i > 31)
421 				return (NULL);
422 
423 			tm->tm_mday = i;
424 			flags |= FLAG_MDAY;
425 
426 			break;
427 
428 		case 'B':
429 		case 'b':
430 		case 'h':
431 			for (i = 0; i < asizeof(tptr->month); i++) {
432 				if (Oalternative) {
433 					if (c == 'B') {
434 						len = strlen(tptr->alt_month[i]);
435 						if (strncasecmp_l(buf,
436 								tptr->alt_month[i],
437 								len, locale) == 0)
438 							break;
439 					}
440 				} else {
441 					len = strlen(tptr->month[i]);
442 					if (strncasecmp_l(buf, tptr->month[i],
443 							len, locale) == 0)
444 						break;
445 				}
446 			}
447 			/*
448 			 * Try the abbreviated month name if the full name
449 			 * wasn't found and Oalternative was not requested.
450 			 */
451 			if (i == asizeof(tptr->month) && !Oalternative) {
452 				for (i = 0; i < asizeof(tptr->month); i++) {
453 					len = strlen(tptr->mon[i]);
454 					if (strncasecmp_l(buf, tptr->mon[i],
455 							len, locale) == 0)
456 						break;
457 				}
458 			}
459 			if (i == asizeof(tptr->month))
460 				return (NULL);
461 
462 			tm->tm_mon = i;
463 			buf += len;
464 			flags |= FLAG_MONTH;
465 
466 			break;
467 
468 		case 'm':
469 			if (!isdigit_l((unsigned char)*buf, locale))
470 				return (NULL);
471 
472 			len = 2;
473 			for (i = 0; len && *buf != 0 &&
474 			     isdigit_l((unsigned char)*buf, locale); buf++) {
475 				i *= 10;
476 				i += *buf - '0';
477 				len--;
478 			}
479 			if (i < 1 || i > 12)
480 				return (NULL);
481 
482 			tm->tm_mon = i - 1;
483 			flags |= FLAG_MONTH;
484 
485 			break;
486 
487 		case 's':
488 			{
489 			char *cp;
490 			int sverrno;
491 			long n;
492 			time_t t;
493 
494 			sverrno = errno;
495 			errno = 0;
496 			n = strtol_l(buf, &cp, 10, locale);
497 			if (errno == ERANGE || (long)(t = n) != n) {
498 				errno = sverrno;
499 				return (NULL);
500 			}
501 			errno = sverrno;
502 			buf = cp;
503 			if (gmtime_r(&t, tm) == NULL)
504 				return (NULL);
505 			*GMTp = 1;
506 			flags |= FLAG_YDAY | FLAG_WDAY | FLAG_MONTH |
507 			    FLAG_MDAY | FLAG_YEAR;
508 			}
509 			break;
510 
511 		case 'Y':
512 		case 'y':
513 			if (*buf == 0 ||
514 			    isspace_l((unsigned char)*buf, locale))
515 				break;
516 
517 			if (!isdigit_l((unsigned char)*buf, locale))
518 				return (NULL);
519 
520 			len = (c == 'Y') ? 4 : 2;
521 			for (i = 0; len && *buf != 0 &&
522 			     isdigit_l((unsigned char)*buf, locale); buf++) {
523 				i *= 10;
524 				i += *buf - '0';
525 				len--;
526 			}
527 			if (c == 'Y')
528 				century = i / 100;
529 			year = i % 100;
530 
531 			flags |= FLAG_YEAR;
532 
533 			break;
534 
535 		case 'Z':
536 			{
537 			const char *cp;
538 			char *zonestr;
539 
540 			for (cp = buf; *cp &&
541 			     isupper_l((unsigned char)*cp, locale); ++cp) {
542 				/*empty*/}
543 			if (cp - buf) {
544 				zonestr = alloca(cp - buf + 1);
545 				strncpy(zonestr, buf, cp - buf);
546 				zonestr[cp - buf] = '\0';
547 				tzset();
548 				if (0 == strcmp(zonestr, "GMT") ||
549 				    0 == strcmp(zonestr, "UTC")) {
550 				    *GMTp = 1;
551 				} else if (0 == strcmp(zonestr, tzname[0])) {
552 				    tm->tm_isdst = 0;
553 				} else if (0 == strcmp(zonestr, tzname[1])) {
554 				    tm->tm_isdst = 1;
555 				} else {
556 				    return (NULL);
557 				}
558 				buf += cp - buf;
559 			}
560 			}
561 			break;
562 
563 		case 'z':
564 			{
565 			int sign = 1;
566 
567 			if (*buf != '+') {
568 				if (*buf == '-')
569 					sign = -1;
570 				else
571 					return (NULL);
572 			}
573 
574 			buf++;
575 			i = 0;
576 			for (len = 4; len > 0; len--) {
577 				if (isdigit_l((unsigned char)*buf, locale)) {
578 					i *= 10;
579 					i += *buf - '0';
580 					buf++;
581 				} else if (len == 2) {
582 					i *= 100;
583 					break;
584 				} else
585 					return (NULL);
586 			}
587 
588 			if (i > 1400 || (sign == -1 && i > 1200) ||
589 			    (i % 100) >= 60)
590 				return (NULL);
591 			tm->tm_hour -= sign * (i / 100);
592 			tm->tm_min  -= sign * (i % 100);
593 			*GMTp = 1;
594 			}
595 			break;
596 
597 		case 'n':
598 		case 't':
599 			while (isspace_l((unsigned char)*buf, locale))
600 				buf++;
601 			break;
602 
603 		default:
604 			return (NULL);
605 		}
606 	}
607 
608 	if (century != -1 || year != -1) {
609 		if (year == -1)
610 			year = 0;
611 		if (century == -1) {
612 			if (year < 69)
613 				year += 100;
614 		} else
615 			year += century * 100 - TM_YEAR_BASE;
616 		tm->tm_year = year;
617 	}
618 
619 	if (!(flags & FLAG_YDAY) && (flags & FLAG_YEAR)) {
620 		if ((flags & (FLAG_MONTH | FLAG_MDAY)) ==
621 		    (FLAG_MONTH | FLAG_MDAY)) {
622 			tm->tm_yday = start_of_month[isleap(tm->tm_year +
623 			    TM_YEAR_BASE)][tm->tm_mon] + (tm->tm_mday - 1);
624 			flags |= FLAG_YDAY;
625 		} else if (day_offset != -1) {
626 			int tmpwday, tmpyday, fwo;
627 
628 			fwo = first_wday_of(tm->tm_year + TM_YEAR_BASE);
629 			/* No incomplete week (week 0). */
630 			if (week_offset == 0 && fwo == day_offset)
631 				return (NULL);
632 
633 			/* Set the date to the first Sunday (or Monday)
634 			 * of the specified week of the year.
635 			 */
636 			tmpwday = (flags & FLAG_WDAY) ? tm->tm_wday :
637 			    day_offset;
638 			tmpyday = (7 - fwo + day_offset) % 7 +
639 			    (week_offset - 1) * 7 +
640 			    (tmpwday - day_offset + 7) % 7;
641 			/* Impossible yday for incomplete week (week 0). */
642 			if (tmpyday < 0) {
643 				if (flags & FLAG_WDAY)
644 					return (NULL);
645 				tmpyday = 0;
646 			}
647 			tm->tm_yday = tmpyday;
648 			flags |= FLAG_YDAY;
649 		}
650 	}
651 
652 	if ((flags & (FLAG_YEAR | FLAG_YDAY)) == (FLAG_YEAR | FLAG_YDAY)) {
653 		if (!(flags & FLAG_MONTH)) {
654 			i = 0;
655 			while (tm->tm_yday >=
656 			    start_of_month[isleap(tm->tm_year +
657 			    TM_YEAR_BASE)][i])
658 				i++;
659 			if (i > 12) {
660 				i = 1;
661 				tm->tm_yday -=
662 				    start_of_month[isleap(tm->tm_year +
663 				    TM_YEAR_BASE)][12];
664 				tm->tm_year++;
665 			}
666 			tm->tm_mon = i - 1;
667 			flags |= FLAG_MONTH;
668 		}
669 		if (!(flags & FLAG_MDAY)) {
670 			tm->tm_mday = tm->tm_yday -
671 			    start_of_month[isleap(tm->tm_year + TM_YEAR_BASE)]
672 			    [tm->tm_mon] + 1;
673 			flags |= FLAG_MDAY;
674 		}
675 		if (!(flags & FLAG_WDAY)) {
676 			wday_offset = first_wday_of(tm->tm_year + TM_YEAR_BASE);
677 			tm->tm_wday = (wday_offset + tm->tm_yday) % 7;
678 			flags |= FLAG_WDAY;
679 		}
680 	}
681 
682 	return ((char *)buf);
683 }
684 
685 char *
686 strptime_l(const char * __restrict buf, const char * __restrict fmt,
687     struct tm * __restrict tm, locale_t loc)
688 {
689 	char *ret;
690 	int gmt;
691 	FIX_LOCALE(loc);
692 
693 	gmt = 0;
694 	ret = _strptime(buf, fmt, tm, &gmt, loc);
695 	if (ret && gmt) {
696 		time_t t = timegm(tm);
697 
698 		localtime_r(&t, tm);
699 	}
700 
701 	return (ret);
702 }
703 
704 char *
705 strptime(const char * __restrict buf, const char * __restrict fmt,
706     struct tm * __restrict tm)
707 {
708 	return strptime_l(buf, fmt, tm, __get_locale());
709 }
710