xref: /freebsd/lib/libc/stdtime/strptime.c (revision 4d846d260e2b9a3d4d0a701462568268cbfe7a5b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2014 Gary Mills
5  * Copyright 2011, Nexenta Systems, Inc.  All rights reserved.
6  * Copyright (c) 1994 Powerdog Industries.  All rights reserved.
7  *
8  * Copyright (c) 2011 The FreeBSD Foundation
9  *
10  * Portions of this software were developed by David Chisnall
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer
20  *    in the documentation and/or other materials provided with the
21  *    distribution.
22  *
23  * THIS SOFTWARE IS PROVIDED BY POWERDOG INDUSTRIES ``AS IS'' AND ANY
24  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE POWERDOG INDUSTRIES BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
31  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
32  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
33  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  * The views and conclusions contained in the software and documentation
36  * are those of the authors and should not be interpreted as representing
37  * official policies, either expressed or implied, of Powerdog Industries.
38  */
39 
40 #include <sys/cdefs.h>
41 #ifndef lint
42 #ifndef NOID
43 static char copyright[] __unused =
44 "@(#) Copyright (c) 1994 Powerdog Industries.  All rights reserved.";
45 static char sccsid[] __unused = "@(#)strptime.c	0.1 (Powerdog) 94/03/27";
46 #endif /* !defined NOID */
47 #endif /* not lint */
48 __FBSDID("$FreeBSD$");
49 
50 #include "namespace.h"
51 #include <time.h>
52 #include <ctype.h>
53 #include <errno.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <pthread.h>
57 #include "private.h"
58 #include "un-namespace.h"
59 #include "libc_private.h"
60 #include "timelocal.h"
61 #include "tzfile.h"
62 
63 static char * _strptime(const char *, const char *, struct tm *, int *, locale_t);
64 
65 #define	asizeof(a)	(sizeof(a) / sizeof((a)[0]))
66 
67 #define	FLAG_NONE	(1 << 0)
68 #define	FLAG_YEAR	(1 << 1)
69 #define	FLAG_MONTH	(1 << 2)
70 #define	FLAG_YDAY	(1 << 3)
71 #define	FLAG_MDAY	(1 << 4)
72 #define	FLAG_WDAY	(1 << 5)
73 
74 /*
75  * Calculate the week day of the first day of a year. Valid for
76  * the Gregorian calendar, which began Sept 14, 1752 in the UK
77  * and its colonies. Ref:
78  * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week
79  */
80 
81 static int
82 first_wday_of(int year)
83 {
84 	return (((2 * (3 - (year / 100) % 4)) + (year % 100) +
85 		((year % 100) / 4) + (isleap(year) ? 6 : 0) + 1) % 7);
86 }
87 
88 static char *
89 _strptime(const char *buf, const char *fmt, struct tm *tm, int *GMTp,
90 		locale_t locale)
91 {
92 	char	c;
93 	const char *ptr;
94 	int	day_offset = -1, wday_offset;
95 	int week_offset;
96 	int	i, len;
97 	int flags;
98 	int Ealternative, Oalternative;
99 	int century, year;
100 	const struct lc_time_T *tptr = __get_current_time_locale(locale);
101 	static int start_of_month[2][13] = {
102 		{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
103 		{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
104 	};
105 
106 	flags = FLAG_NONE;
107 	century = -1;
108 	year = -1;
109 
110 	ptr = fmt;
111 	while (*ptr != 0) {
112 		c = *ptr++;
113 
114 		if (c != '%') {
115 			if (isspace_l((unsigned char)c, locale))
116 				while (*buf != 0 &&
117 				       isspace_l((unsigned char)*buf, locale))
118 					buf++;
119 			else if (c != *buf++)
120 				return (NULL);
121 			continue;
122 		}
123 
124 		Ealternative = 0;
125 		Oalternative = 0;
126 label:
127 		c = *ptr++;
128 		switch (c) {
129 		case '%':
130 			if (*buf++ != '%')
131 				return (NULL);
132 			break;
133 
134 		case '+':
135 			buf = _strptime(buf, tptr->date_fmt, tm, GMTp, locale);
136 			if (buf == NULL)
137 				return (NULL);
138 			flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
139 			break;
140 
141 		case 'C':
142 			if (!isdigit_l((unsigned char)*buf, locale))
143 				return (NULL);
144 
145 			/* XXX This will break for 3-digit centuries. */
146 			len = 2;
147 			for (i = 0; len && *buf != 0 &&
148 			     isdigit_l((unsigned char)*buf, locale); buf++) {
149 				i *= 10;
150 				i += *buf - '0';
151 				len--;
152 			}
153 
154 			century = i;
155 			flags |= FLAG_YEAR;
156 
157 			break;
158 
159 		case 'c':
160 			buf = _strptime(buf, tptr->c_fmt, tm, GMTp, locale);
161 			if (buf == NULL)
162 				return (NULL);
163 			flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
164 			break;
165 
166 		case 'D':
167 			buf = _strptime(buf, "%m/%d/%y", tm, GMTp, locale);
168 			if (buf == NULL)
169 				return (NULL);
170 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
171 			break;
172 
173 		case 'E':
174 			if (Ealternative || Oalternative)
175 				break;
176 			Ealternative++;
177 			goto label;
178 
179 		case 'O':
180 			if (Ealternative || Oalternative)
181 				break;
182 			Oalternative++;
183 			goto label;
184 
185 		case 'F':
186 			buf = _strptime(buf, "%Y-%m-%d", tm, GMTp, locale);
187 			if (buf == NULL)
188 				return (NULL);
189 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
190 			break;
191 
192 		case 'R':
193 			buf = _strptime(buf, "%H:%M", tm, GMTp, locale);
194 			if (buf == NULL)
195 				return (NULL);
196 			break;
197 
198 		case 'r':
199 			buf = _strptime(buf, tptr->ampm_fmt, tm, GMTp, locale);
200 			if (buf == NULL)
201 				return (NULL);
202 			break;
203 
204 		case 'T':
205 			buf = _strptime(buf, "%H:%M:%S", tm, GMTp, locale);
206 			if (buf == NULL)
207 				return (NULL);
208 			break;
209 
210 		case 'X':
211 			buf = _strptime(buf, tptr->X_fmt, tm, GMTp, locale);
212 			if (buf == NULL)
213 				return (NULL);
214 			break;
215 
216 		case 'x':
217 			buf = _strptime(buf, tptr->x_fmt, tm, GMTp, locale);
218 			if (buf == NULL)
219 				return (NULL);
220 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
221 			break;
222 
223 		case 'j':
224 			if (!isdigit_l((unsigned char)*buf, locale))
225 				return (NULL);
226 
227 			len = 3;
228 			for (i = 0; len && *buf != 0 &&
229 			     isdigit_l((unsigned char)*buf, locale); buf++){
230 				i *= 10;
231 				i += *buf - '0';
232 				len--;
233 			}
234 			if (i < 1 || i > 366)
235 				return (NULL);
236 
237 			tm->tm_yday = i - 1;
238 			flags |= FLAG_YDAY;
239 
240 			break;
241 
242 		case 'M':
243 		case 'S':
244 			if (*buf == 0 ||
245 				isspace_l((unsigned char)*buf, locale))
246 				break;
247 
248 			if (!isdigit_l((unsigned char)*buf, locale))
249 				return (NULL);
250 
251 			len = 2;
252 			for (i = 0; len && *buf != 0 &&
253 				isdigit_l((unsigned char)*buf, locale); buf++){
254 				i *= 10;
255 				i += *buf - '0';
256 				len--;
257 			}
258 
259 			if (c == 'M') {
260 				if (i > 59)
261 					return (NULL);
262 				tm->tm_min = i;
263 			} else {
264 				if (i > 60)
265 					return (NULL);
266 				tm->tm_sec = i;
267 			}
268 
269 			break;
270 
271 		case 'H':
272 		case 'I':
273 		case 'k':
274 		case 'l':
275 			/*
276 			 * %k and %l specifiers are documented as being
277 			 * blank-padded.  However, there is no harm in
278 			 * allowing zero-padding.
279 			 *
280 			 * XXX %k and %l specifiers may gobble one too many
281 			 * digits if used incorrectly.
282 			 */
283 
284 			len = 2;
285 			if ((c == 'k' || c == 'l') &&
286 			    isblank_l((unsigned char)*buf, locale)) {
287 				buf++;
288 				len = 1;
289 			}
290 
291 			if (!isdigit_l((unsigned char)*buf, locale))
292 				return (NULL);
293 
294 			for (i = 0; len && *buf != 0 &&
295 			     isdigit_l((unsigned char)*buf, locale); buf++) {
296 				i *= 10;
297 				i += *buf - '0';
298 				len--;
299 			}
300 			if (c == 'H' || c == 'k') {
301 				if (i > 23)
302 					return (NULL);
303 			} else if (i == 0 || i > 12)
304 				return (NULL);
305 
306 			tm->tm_hour = i;
307 
308 			break;
309 
310 		case 'p':
311 			/*
312 			 * XXX This is bogus if parsed before hour-related
313 			 * specifiers.
314 			 */
315 			if (tm->tm_hour > 12)
316 				return (NULL);
317 
318 			len = strlen(tptr->am);
319 			if (strncasecmp_l(buf, tptr->am, len, locale) == 0) {
320 				if (tm->tm_hour == 12)
321 					tm->tm_hour = 0;
322 				buf += len;
323 				break;
324 			}
325 
326 			len = strlen(tptr->pm);
327 			if (strncasecmp_l(buf, tptr->pm, len, locale) == 0) {
328 				if (tm->tm_hour != 12)
329 					tm->tm_hour += 12;
330 				buf += len;
331 				break;
332 			}
333 
334 			return (NULL);
335 
336 		case 'A':
337 		case 'a':
338 			for (i = 0; i < asizeof(tptr->weekday); i++) {
339 				len = strlen(tptr->weekday[i]);
340 				if (strncasecmp_l(buf, tptr->weekday[i],
341 						len, locale) == 0)
342 					break;
343 				len = strlen(tptr->wday[i]);
344 				if (strncasecmp_l(buf, tptr->wday[i],
345 						len, locale) == 0)
346 					break;
347 			}
348 			if (i == asizeof(tptr->weekday))
349 				return (NULL);
350 
351 			buf += len;
352 			tm->tm_wday = i;
353 			flags |= FLAG_WDAY;
354 			break;
355 
356 		case 'U':
357 		case 'W':
358 			/*
359 			 * XXX This is bogus, as we can not assume any valid
360 			 * information present in the tm structure at this
361 			 * point to calculate a real value, so just check the
362 			 * range for now.
363 			 */
364 			if (!isdigit_l((unsigned char)*buf, locale))
365 				return (NULL);
366 
367 			len = 2;
368 			for (i = 0; len && *buf != 0 &&
369 			     isdigit_l((unsigned char)*buf, locale); buf++) {
370 				i *= 10;
371 				i += *buf - '0';
372 				len--;
373 			}
374 			if (i > 53)
375 				return (NULL);
376 
377 			if (c == 'U')
378 				day_offset = TM_SUNDAY;
379 			else
380 				day_offset = TM_MONDAY;
381 
382 
383 			week_offset = i;
384 
385 			break;
386 
387 		case 'u':
388 		case 'w':
389 			if (!isdigit_l((unsigned char)*buf, locale))
390 				return (NULL);
391 
392 			i = *buf++ - '0';
393 			if (i < 0 || i > 7 || (c == 'u' && i < 1) ||
394 			    (c == 'w' && i > 6))
395 				return (NULL);
396 
397 			tm->tm_wday = i % 7;
398 			flags |= FLAG_WDAY;
399 
400 			break;
401 
402 		case 'e':
403 			/*
404 			 * With %e format, our strftime(3) adds a blank space
405 			 * before single digits.
406 			 */
407 			if (*buf != 0 &&
408 			    isspace_l((unsigned char)*buf, locale))
409 			       buf++;
410 			/* FALLTHROUGH */
411 		case 'd':
412 			/*
413 			 * The %e specifier was once explicitly documented as
414 			 * not being zero-padded but was later changed to
415 			 * equivalent to %d.  There is no harm in allowing
416 			 * such padding.
417 			 *
418 			 * XXX The %e specifier may gobble one too many
419 			 * digits if used incorrectly.
420 			 */
421 			if (!isdigit_l((unsigned char)*buf, locale))
422 				return (NULL);
423 
424 			len = 2;
425 			for (i = 0; len && *buf != 0 &&
426 			     isdigit_l((unsigned char)*buf, locale); buf++) {
427 				i *= 10;
428 				i += *buf - '0';
429 				len--;
430 			}
431 			if (i == 0 || i > 31)
432 				return (NULL);
433 
434 			tm->tm_mday = i;
435 			flags |= FLAG_MDAY;
436 
437 			break;
438 
439 		case 'B':
440 		case 'b':
441 		case 'h':
442 			for (i = 0; i < asizeof(tptr->month); i++) {
443 				if (Oalternative) {
444 					if (c == 'B') {
445 						len = strlen(tptr->alt_month[i]);
446 						if (strncasecmp_l(buf,
447 								tptr->alt_month[i],
448 								len, locale) == 0)
449 							break;
450 					}
451 				} else {
452 					len = strlen(tptr->month[i]);
453 					if (strncasecmp_l(buf, tptr->month[i],
454 							len, locale) == 0)
455 						break;
456 				}
457 			}
458 			/*
459 			 * Try the abbreviated month name if the full name
460 			 * wasn't found and Oalternative was not requested.
461 			 */
462 			if (i == asizeof(tptr->month) && !Oalternative) {
463 				for (i = 0; i < asizeof(tptr->month); i++) {
464 					len = strlen(tptr->mon[i]);
465 					if (strncasecmp_l(buf, tptr->mon[i],
466 							len, locale) == 0)
467 						break;
468 				}
469 			}
470 			if (i == asizeof(tptr->month))
471 				return (NULL);
472 
473 			tm->tm_mon = i;
474 			buf += len;
475 			flags |= FLAG_MONTH;
476 
477 			break;
478 
479 		case 'm':
480 			if (!isdigit_l((unsigned char)*buf, locale))
481 				return (NULL);
482 
483 			len = 2;
484 			for (i = 0; len && *buf != 0 &&
485 			     isdigit_l((unsigned char)*buf, locale); buf++) {
486 				i *= 10;
487 				i += *buf - '0';
488 				len--;
489 			}
490 			if (i < 1 || i > 12)
491 				return (NULL);
492 
493 			tm->tm_mon = i - 1;
494 			flags |= FLAG_MONTH;
495 
496 			break;
497 
498 		case 's':
499 			{
500 			char *cp;
501 			int sverrno;
502 			long n;
503 			time_t t;
504 
505 			sverrno = errno;
506 			errno = 0;
507 			n = strtol_l(buf, &cp, 10, locale);
508 			if (errno == ERANGE || (long)(t = n) != n) {
509 				errno = sverrno;
510 				return (NULL);
511 			}
512 			errno = sverrno;
513 			buf = cp;
514 			if (gmtime_r(&t, tm) == NULL)
515 				return (NULL);
516 			*GMTp = 1;
517 			flags |= FLAG_YDAY | FLAG_WDAY | FLAG_MONTH |
518 			    FLAG_MDAY | FLAG_YEAR;
519 			}
520 			break;
521 
522 		case 'Y':
523 		case 'y':
524 			if (*buf == 0 ||
525 			    isspace_l((unsigned char)*buf, locale))
526 				break;
527 
528 			if (!isdigit_l((unsigned char)*buf, locale))
529 				return (NULL);
530 
531 			len = (c == 'Y') ? 4 : 2;
532 			for (i = 0; len && *buf != 0 &&
533 			     isdigit_l((unsigned char)*buf, locale); buf++) {
534 				i *= 10;
535 				i += *buf - '0';
536 				len--;
537 			}
538 			if (c == 'Y')
539 				century = i / 100;
540 			year = i % 100;
541 
542 			flags |= FLAG_YEAR;
543 
544 			break;
545 
546 		case 'Z':
547 			{
548 			const char *cp;
549 			char *zonestr;
550 
551 			for (cp = buf; *cp &&
552 			     isupper_l((unsigned char)*cp, locale); ++cp) {
553 				/*empty*/}
554 			if (cp - buf) {
555 				zonestr = alloca(cp - buf + 1);
556 				strncpy(zonestr, buf, cp - buf);
557 				zonestr[cp - buf] = '\0';
558 				tzset();
559 				if (0 == strcmp(zonestr, "GMT") ||
560 				    0 == strcmp(zonestr, "UTC")) {
561 				    *GMTp = 1;
562 				} else if (0 == strcmp(zonestr, tzname[0])) {
563 				    tm->tm_isdst = 0;
564 				} else if (0 == strcmp(zonestr, tzname[1])) {
565 				    tm->tm_isdst = 1;
566 				} else {
567 				    return (NULL);
568 				}
569 				buf += cp - buf;
570 			}
571 			}
572 			break;
573 
574 		case 'z':
575 			{
576 			int sign = 1;
577 
578 			if (*buf != '+') {
579 				if (*buf == '-')
580 					sign = -1;
581 				else
582 					return (NULL);
583 			}
584 
585 			buf++;
586 			i = 0;
587 			for (len = 4; len > 0; len--) {
588 				if (isdigit_l((unsigned char)*buf, locale)) {
589 					i *= 10;
590 					i += *buf - '0';
591 					buf++;
592 				} else if (len == 2) {
593 					i *= 100;
594 					break;
595 				} else
596 					return (NULL);
597 			}
598 
599 			if (i > 1400 || (sign == -1 && i > 1200) ||
600 			    (i % 100) >= 60)
601 				return (NULL);
602 			tm->tm_hour -= sign * (i / 100);
603 			tm->tm_min  -= sign * (i % 100);
604 			*GMTp = 1;
605 			}
606 			break;
607 
608 		case 'n':
609 		case 't':
610 			while (isspace_l((unsigned char)*buf, locale))
611 				buf++;
612 			break;
613 
614 		default:
615 			return (NULL);
616 		}
617 	}
618 
619 	if (century != -1 || year != -1) {
620 		if (year == -1)
621 			year = 0;
622 		if (century == -1) {
623 			if (year < 69)
624 				year += 100;
625 		} else
626 			year += century * 100 - TM_YEAR_BASE;
627 		tm->tm_year = year;
628 	}
629 
630 	if (!(flags & FLAG_YDAY) && (flags & FLAG_YEAR)) {
631 		if ((flags & (FLAG_MONTH | FLAG_MDAY)) ==
632 		    (FLAG_MONTH | FLAG_MDAY)) {
633 			tm->tm_yday = start_of_month[isleap(tm->tm_year +
634 			    TM_YEAR_BASE)][tm->tm_mon] + (tm->tm_mday - 1);
635 			flags |= FLAG_YDAY;
636 		} else if (day_offset != -1) {
637 			int tmpwday, tmpyday, fwo;
638 
639 			fwo = first_wday_of(tm->tm_year + TM_YEAR_BASE);
640 			/* No incomplete week (week 0). */
641 			if (week_offset == 0 && fwo == day_offset)
642 				return (NULL);
643 
644 			/* Set the date to the first Sunday (or Monday)
645 			 * of the specified week of the year.
646 			 */
647 			tmpwday = (flags & FLAG_WDAY) ? tm->tm_wday :
648 			    day_offset;
649 			tmpyday = (7 - fwo + day_offset) % 7 +
650 			    (week_offset - 1) * 7 +
651 			    (tmpwday - day_offset + 7) % 7;
652 			/* Impossible yday for incomplete week (week 0). */
653 			if (tmpyday < 0) {
654 				if (flags & FLAG_WDAY)
655 					return (NULL);
656 				tmpyday = 0;
657 			}
658 			tm->tm_yday = tmpyday;
659 			flags |= FLAG_YDAY;
660 		}
661 	}
662 
663 	if ((flags & (FLAG_YEAR | FLAG_YDAY)) == (FLAG_YEAR | FLAG_YDAY)) {
664 		if (!(flags & FLAG_MONTH)) {
665 			i = 0;
666 			while (tm->tm_yday >=
667 			    start_of_month[isleap(tm->tm_year +
668 			    TM_YEAR_BASE)][i])
669 				i++;
670 			if (i > 12) {
671 				i = 1;
672 				tm->tm_yday -=
673 				    start_of_month[isleap(tm->tm_year +
674 				    TM_YEAR_BASE)][12];
675 				tm->tm_year++;
676 			}
677 			tm->tm_mon = i - 1;
678 			flags |= FLAG_MONTH;
679 		}
680 		if (!(flags & FLAG_MDAY)) {
681 			tm->tm_mday = tm->tm_yday -
682 			    start_of_month[isleap(tm->tm_year + TM_YEAR_BASE)]
683 			    [tm->tm_mon] + 1;
684 			flags |= FLAG_MDAY;
685 		}
686 		if (!(flags & FLAG_WDAY)) {
687 			i = 0;
688 			wday_offset = first_wday_of(tm->tm_year);
689 			while (i++ <= tm->tm_yday) {
690 				if (wday_offset++ >= 6)
691 					wday_offset = 0;
692 			}
693 			tm->tm_wday = wday_offset;
694 			flags |= FLAG_WDAY;
695 		}
696 	}
697 
698 	return ((char *)buf);
699 }
700 
701 char *
702 strptime_l(const char * __restrict buf, const char * __restrict fmt,
703     struct tm * __restrict tm, locale_t loc)
704 {
705 	char *ret;
706 	int gmt;
707 	FIX_LOCALE(loc);
708 
709 	gmt = 0;
710 	ret = _strptime(buf, fmt, tm, &gmt, loc);
711 	if (ret && gmt) {
712 		time_t t = timegm(tm);
713 
714 		localtime_r(&t, tm);
715 	}
716 
717 	return (ret);
718 }
719 
720 char *
721 strptime(const char * __restrict buf, const char * __restrict fmt,
722     struct tm * __restrict tm)
723 {
724 	return strptime_l(buf, fmt, tm, __get_locale());
725 }
726