xref: /freebsd/lib/libc/stdtime/strptime.c (revision 058ac3e8063366dafa634d9107642e12b038bf09)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2014 Gary Mills
5  * Copyright 2011, Nexenta Systems, Inc.  All rights reserved.
6  * Copyright (c) 1994 Powerdog Industries.  All rights reserved.
7  *
8  * Copyright (c) 2011 The FreeBSD Foundation
9  *
10  * Portions of this software were developed by David Chisnall
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer
20  *    in the documentation and/or other materials provided with the
21  *    distribution.
22  *
23  * THIS SOFTWARE IS PROVIDED BY POWERDOG INDUSTRIES ``AS IS'' AND ANY
24  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE POWERDOG INDUSTRIES BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
31  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
32  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
33  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  * The views and conclusions contained in the software and documentation
36  * are those of the authors and should not be interpreted as representing
37  * official policies, either expressed or implied, of Powerdog Industries.
38  */
39 
40 #include <sys/cdefs.h>
41 #ifndef lint
42 #ifndef NOID
43 static char copyright[] __unused =
44 "@(#) Copyright (c) 1994 Powerdog Industries.  All rights reserved.";
45 static char sccsid[] __unused = "@(#)strptime.c	0.1 (Powerdog) 94/03/27";
46 #endif /* !defined NOID */
47 #endif /* not lint */
48 __FBSDID("$FreeBSD$");
49 
50 #include "namespace.h"
51 #include <time.h>
52 #include <ctype.h>
53 #include <errno.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <pthread.h>
57 #include "un-namespace.h"
58 #include "libc_private.h"
59 #include "timelocal.h"
60 #include "tzfile.h"
61 
62 static char * _strptime(const char *, const char *, struct tm *, int *, locale_t);
63 
64 #define	asizeof(a)	(sizeof(a) / sizeof((a)[0]))
65 
66 #define	FLAG_NONE	(1 << 0)
67 #define	FLAG_YEAR	(1 << 1)
68 #define	FLAG_MONTH	(1 << 2)
69 #define	FLAG_YDAY	(1 << 3)
70 #define	FLAG_MDAY	(1 << 4)
71 #define	FLAG_WDAY	(1 << 5)
72 
73 /*
74  * Calculate the week day of the first day of a year. Valid for
75  * the Gregorian calendar, which began Sept 14, 1752 in the UK
76  * and its colonies. Ref:
77  * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week
78  */
79 
80 static int
81 first_wday_of(int year)
82 {
83 	return (((2 * (3 - (year / 100) % 4)) + (year % 100) +
84 		((year % 100) / 4) + (isleap(year) ? 6 : 0) + 1) % 7);
85 }
86 
87 static char *
88 _strptime(const char *buf, const char *fmt, struct tm *tm, int *GMTp,
89 		locale_t locale)
90 {
91 	char	c;
92 	const char *ptr;
93 	int	day_offset = -1, wday_offset;
94 	int week_offset;
95 	int	i, len;
96 	int flags;
97 	int Ealternative, Oalternative;
98 	int century, year;
99 	const struct lc_time_T *tptr = __get_current_time_locale(locale);
100 	static int start_of_month[2][13] = {
101 		{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
102 		{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
103 	};
104 
105 	flags = FLAG_NONE;
106 	century = -1;
107 	year = -1;
108 
109 	ptr = fmt;
110 	while (*ptr != 0) {
111 		c = *ptr++;
112 
113 		if (c != '%') {
114 			if (isspace_l((unsigned char)c, locale))
115 				while (*buf != 0 &&
116 				       isspace_l((unsigned char)*buf, locale))
117 					buf++;
118 			else if (c != *buf++)
119 				return (NULL);
120 			continue;
121 		}
122 
123 		Ealternative = 0;
124 		Oalternative = 0;
125 label:
126 		c = *ptr++;
127 		switch (c) {
128 		case '%':
129 			if (*buf++ != '%')
130 				return (NULL);
131 			break;
132 
133 		case '+':
134 			buf = _strptime(buf, tptr->date_fmt, tm, GMTp, locale);
135 			if (buf == NULL)
136 				return (NULL);
137 			flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
138 			break;
139 
140 		case 'C':
141 			if (!isdigit_l((unsigned char)*buf, locale))
142 				return (NULL);
143 
144 			/* XXX This will break for 3-digit centuries. */
145 			len = 2;
146 			for (i = 0; len && *buf != 0 &&
147 			     isdigit_l((unsigned char)*buf, locale); buf++) {
148 				i *= 10;
149 				i += *buf - '0';
150 				len--;
151 			}
152 
153 			century = i;
154 			flags |= FLAG_YEAR;
155 
156 			break;
157 
158 		case 'c':
159 			buf = _strptime(buf, tptr->c_fmt, tm, GMTp, locale);
160 			if (buf == NULL)
161 				return (NULL);
162 			flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
163 			break;
164 
165 		case 'D':
166 			buf = _strptime(buf, "%m/%d/%y", tm, GMTp, locale);
167 			if (buf == NULL)
168 				return (NULL);
169 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
170 			break;
171 
172 		case 'E':
173 			if (Ealternative || Oalternative)
174 				break;
175 			Ealternative++;
176 			goto label;
177 
178 		case 'O':
179 			if (Ealternative || Oalternative)
180 				break;
181 			Oalternative++;
182 			goto label;
183 
184 		case 'F':
185 			buf = _strptime(buf, "%Y-%m-%d", tm, GMTp, locale);
186 			if (buf == NULL)
187 				return (NULL);
188 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
189 			break;
190 
191 		case 'R':
192 			buf = _strptime(buf, "%H:%M", tm, GMTp, locale);
193 			if (buf == NULL)
194 				return (NULL);
195 			break;
196 
197 		case 'r':
198 			buf = _strptime(buf, tptr->ampm_fmt, tm, GMTp, locale);
199 			if (buf == NULL)
200 				return (NULL);
201 			break;
202 
203 		case 'T':
204 			buf = _strptime(buf, "%H:%M:%S", tm, GMTp, locale);
205 			if (buf == NULL)
206 				return (NULL);
207 			break;
208 
209 		case 'X':
210 			buf = _strptime(buf, tptr->X_fmt, tm, GMTp, locale);
211 			if (buf == NULL)
212 				return (NULL);
213 			break;
214 
215 		case 'x':
216 			buf = _strptime(buf, tptr->x_fmt, tm, GMTp, locale);
217 			if (buf == NULL)
218 				return (NULL);
219 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
220 			break;
221 
222 		case 'j':
223 			if (!isdigit_l((unsigned char)*buf, locale))
224 				return (NULL);
225 
226 			len = 3;
227 			for (i = 0; len && *buf != 0 &&
228 			     isdigit_l((unsigned char)*buf, locale); buf++){
229 				i *= 10;
230 				i += *buf - '0';
231 				len--;
232 			}
233 			if (i < 1 || i > 366)
234 				return (NULL);
235 
236 			tm->tm_yday = i - 1;
237 			flags |= FLAG_YDAY;
238 
239 			break;
240 
241 		case 'M':
242 		case 'S':
243 			if (*buf == 0 ||
244 				isspace_l((unsigned char)*buf, locale))
245 				break;
246 
247 			if (!isdigit_l((unsigned char)*buf, locale))
248 				return (NULL);
249 
250 			len = 2;
251 			for (i = 0; len && *buf != 0 &&
252 				isdigit_l((unsigned char)*buf, locale); buf++){
253 				i *= 10;
254 				i += *buf - '0';
255 				len--;
256 			}
257 
258 			if (c == 'M') {
259 				if (i > 59)
260 					return (NULL);
261 				tm->tm_min = i;
262 			} else {
263 				if (i > 60)
264 					return (NULL);
265 				tm->tm_sec = i;
266 			}
267 
268 			break;
269 
270 		case 'H':
271 		case 'I':
272 		case 'k':
273 		case 'l':
274 			/*
275 			 * %k and %l specifiers are documented as being
276 			 * blank-padded.  However, there is no harm in
277 			 * allowing zero-padding.
278 			 *
279 			 * XXX %k and %l specifiers may gobble one too many
280 			 * digits if used incorrectly.
281 			 */
282 
283 			len = 2;
284 			if ((c == 'k' || c == 'l') &&
285 			    isblank_l((unsigned char)*buf, locale)) {
286 				buf++;
287 				len = 1;
288 			}
289 
290 			if (!isdigit_l((unsigned char)*buf, locale))
291 				return (NULL);
292 
293 			for (i = 0; len && *buf != 0 &&
294 			     isdigit_l((unsigned char)*buf, locale); buf++) {
295 				i *= 10;
296 				i += *buf - '0';
297 				len--;
298 			}
299 			if (c == 'H' || c == 'k') {
300 				if (i > 23)
301 					return (NULL);
302 			} else if (i == 0 || i > 12)
303 				return (NULL);
304 
305 			tm->tm_hour = i;
306 
307 			break;
308 
309 		case 'p':
310 			/*
311 			 * XXX This is bogus if parsed before hour-related
312 			 * specifiers.
313 			 */
314 			if (tm->tm_hour > 12)
315 				return (NULL);
316 
317 			len = strlen(tptr->am);
318 			if (strncasecmp_l(buf, tptr->am, len, locale) == 0) {
319 				if (tm->tm_hour == 12)
320 					tm->tm_hour = 0;
321 				buf += len;
322 				break;
323 			}
324 
325 			len = strlen(tptr->pm);
326 			if (strncasecmp_l(buf, tptr->pm, len, locale) == 0) {
327 				if (tm->tm_hour != 12)
328 					tm->tm_hour += 12;
329 				buf += len;
330 				break;
331 			}
332 
333 			return (NULL);
334 
335 		case 'A':
336 		case 'a':
337 			for (i = 0; i < asizeof(tptr->weekday); i++) {
338 				len = strlen(tptr->weekday[i]);
339 				if (strncasecmp_l(buf, tptr->weekday[i],
340 						len, locale) == 0)
341 					break;
342 				len = strlen(tptr->wday[i]);
343 				if (strncasecmp_l(buf, tptr->wday[i],
344 						len, locale) == 0)
345 					break;
346 			}
347 			if (i == asizeof(tptr->weekday))
348 				return (NULL);
349 
350 			buf += len;
351 			tm->tm_wday = i;
352 			flags |= FLAG_WDAY;
353 			break;
354 
355 		case 'U':
356 		case 'W':
357 			/*
358 			 * XXX This is bogus, as we can not assume any valid
359 			 * information present in the tm structure at this
360 			 * point to calculate a real value, so just check the
361 			 * range for now.
362 			 */
363 			if (!isdigit_l((unsigned char)*buf, locale))
364 				return (NULL);
365 
366 			len = 2;
367 			for (i = 0; len && *buf != 0 &&
368 			     isdigit_l((unsigned char)*buf, locale); buf++) {
369 				i *= 10;
370 				i += *buf - '0';
371 				len--;
372 			}
373 			if (i > 53)
374 				return (NULL);
375 
376 			if (c == 'U')
377 				day_offset = TM_SUNDAY;
378 			else
379 				day_offset = TM_MONDAY;
380 
381 
382 			week_offset = i;
383 
384 			break;
385 
386 		case 'u':
387 		case 'w':
388 			if (!isdigit_l((unsigned char)*buf, locale))
389 				return (NULL);
390 
391 			i = *buf++ - '0';
392 			if (i < 0 || i > 7 || (c == 'u' && i < 1) ||
393 			    (c == 'w' && i > 6))
394 				return (NULL);
395 
396 			tm->tm_wday = i % 7;
397 			flags |= FLAG_WDAY;
398 
399 			break;
400 
401 		case 'e':
402 			/*
403 			 * With %e format, our strftime(3) adds a blank space
404 			 * before single digits.
405 			 */
406 			if (*buf != 0 &&
407 			    isspace_l((unsigned char)*buf, locale))
408 			       buf++;
409 			/* FALLTHROUGH */
410 		case 'd':
411 			/*
412 			 * The %e specifier was once explicitly documented as
413 			 * not being zero-padded but was later changed to
414 			 * equivalent to %d.  There is no harm in allowing
415 			 * such padding.
416 			 *
417 			 * XXX The %e specifier may gobble one too many
418 			 * digits if used incorrectly.
419 			 */
420 			if (!isdigit_l((unsigned char)*buf, locale))
421 				return (NULL);
422 
423 			len = 2;
424 			for (i = 0; len && *buf != 0 &&
425 			     isdigit_l((unsigned char)*buf, locale); buf++) {
426 				i *= 10;
427 				i += *buf - '0';
428 				len--;
429 			}
430 			if (i == 0 || i > 31)
431 				return (NULL);
432 
433 			tm->tm_mday = i;
434 			flags |= FLAG_MDAY;
435 
436 			break;
437 
438 		case 'B':
439 		case 'b':
440 		case 'h':
441 			for (i = 0; i < asizeof(tptr->month); i++) {
442 				if (Oalternative) {
443 					if (c == 'B') {
444 						len = strlen(tptr->alt_month[i]);
445 						if (strncasecmp_l(buf,
446 								tptr->alt_month[i],
447 								len, locale) == 0)
448 							break;
449 					}
450 				} else {
451 					len = strlen(tptr->month[i]);
452 					if (strncasecmp_l(buf, tptr->month[i],
453 							len, locale) == 0)
454 						break;
455 				}
456 			}
457 			/*
458 			 * Try the abbreviated month name if the full name
459 			 * wasn't found and Oalternative was not requested.
460 			 */
461 			if (i == asizeof(tptr->month) && !Oalternative) {
462 				for (i = 0; i < asizeof(tptr->month); i++) {
463 					len = strlen(tptr->mon[i]);
464 					if (strncasecmp_l(buf, tptr->mon[i],
465 							len, locale) == 0)
466 						break;
467 				}
468 			}
469 			if (i == asizeof(tptr->month))
470 				return (NULL);
471 
472 			tm->tm_mon = i;
473 			buf += len;
474 			flags |= FLAG_MONTH;
475 
476 			break;
477 
478 		case 'm':
479 			if (!isdigit_l((unsigned char)*buf, locale))
480 				return (NULL);
481 
482 			len = 2;
483 			for (i = 0; len && *buf != 0 &&
484 			     isdigit_l((unsigned char)*buf, locale); buf++) {
485 				i *= 10;
486 				i += *buf - '0';
487 				len--;
488 			}
489 			if (i < 1 || i > 12)
490 				return (NULL);
491 
492 			tm->tm_mon = i - 1;
493 			flags |= FLAG_MONTH;
494 
495 			break;
496 
497 		case 's':
498 			{
499 			char *cp;
500 			int sverrno;
501 			long n;
502 			time_t t;
503 
504 			sverrno = errno;
505 			errno = 0;
506 			n = strtol_l(buf, &cp, 10, locale);
507 			if (errno == ERANGE || (long)(t = n) != n) {
508 				errno = sverrno;
509 				return (NULL);
510 			}
511 			errno = sverrno;
512 			buf = cp;
513 			if (gmtime_r(&t, tm) == NULL)
514 				return (NULL);
515 			*GMTp = 1;
516 			flags |= FLAG_YDAY | FLAG_WDAY | FLAG_MONTH |
517 			    FLAG_MDAY | FLAG_YEAR;
518 			}
519 			break;
520 
521 		case 'Y':
522 		case 'y':
523 			if (*buf == 0 ||
524 			    isspace_l((unsigned char)*buf, locale))
525 				break;
526 
527 			if (!isdigit_l((unsigned char)*buf, locale))
528 				return (NULL);
529 
530 			len = (c == 'Y') ? 4 : 2;
531 			for (i = 0; len && *buf != 0 &&
532 			     isdigit_l((unsigned char)*buf, locale); buf++) {
533 				i *= 10;
534 				i += *buf - '0';
535 				len--;
536 			}
537 			if (c == 'Y')
538 				century = i / 100;
539 			year = i % 100;
540 
541 			flags |= FLAG_YEAR;
542 
543 			break;
544 
545 		case 'Z':
546 			{
547 			const char *cp;
548 			char *zonestr;
549 
550 			for (cp = buf; *cp &&
551 			     isupper_l((unsigned char)*cp, locale); ++cp) {
552 				/*empty*/}
553 			if (cp - buf) {
554 				zonestr = alloca(cp - buf + 1);
555 				strncpy(zonestr, buf, cp - buf);
556 				zonestr[cp - buf] = '\0';
557 				tzset();
558 				if (0 == strcmp(zonestr, "GMT") ||
559 				    0 == strcmp(zonestr, "UTC")) {
560 				    *GMTp = 1;
561 				} else if (0 == strcmp(zonestr, tzname[0])) {
562 				    tm->tm_isdst = 0;
563 				} else if (0 == strcmp(zonestr, tzname[1])) {
564 				    tm->tm_isdst = 1;
565 				} else {
566 				    return (NULL);
567 				}
568 				buf += cp - buf;
569 			}
570 			}
571 			break;
572 
573 		case 'z':
574 			{
575 			int sign = 1;
576 
577 			if (*buf != '+') {
578 				if (*buf == '-')
579 					sign = -1;
580 				else
581 					return (NULL);
582 			}
583 
584 			buf++;
585 			i = 0;
586 			for (len = 4; len > 0; len--) {
587 				if (isdigit_l((unsigned char)*buf, locale)) {
588 					i *= 10;
589 					i += *buf - '0';
590 					buf++;
591 				} else if (len == 2) {
592 					i *= 100;
593 					break;
594 				} else
595 					return (NULL);
596 			}
597 
598 			if (i > 1400 || (sign == -1 && i > 1200) ||
599 			    (i % 100) >= 60)
600 				return (NULL);
601 			tm->tm_hour -= sign * (i / 100);
602 			tm->tm_min  -= sign * (i % 100);
603 			*GMTp = 1;
604 			}
605 			break;
606 
607 		case 'n':
608 		case 't':
609 			while (isspace_l((unsigned char)*buf, locale))
610 				buf++;
611 			break;
612 
613 		default:
614 			return (NULL);
615 		}
616 	}
617 
618 	if (century != -1 || year != -1) {
619 		if (year == -1)
620 			year = 0;
621 		if (century == -1) {
622 			if (year < 69)
623 				year += 100;
624 		} else
625 			year += century * 100 - TM_YEAR_BASE;
626 		tm->tm_year = year;
627 	}
628 
629 	if (!(flags & FLAG_YDAY) && (flags & FLAG_YEAR)) {
630 		if ((flags & (FLAG_MONTH | FLAG_MDAY)) ==
631 		    (FLAG_MONTH | FLAG_MDAY)) {
632 			tm->tm_yday = start_of_month[isleap(tm->tm_year +
633 			    TM_YEAR_BASE)][tm->tm_mon] + (tm->tm_mday - 1);
634 			flags |= FLAG_YDAY;
635 		} else if (day_offset != -1) {
636 			int tmpwday, tmpyday, fwo;
637 
638 			fwo = first_wday_of(tm->tm_year + TM_YEAR_BASE);
639 			/* No incomplete week (week 0). */
640 			if (week_offset == 0 && fwo == day_offset)
641 				return (NULL);
642 
643 			/* Set the date to the first Sunday (or Monday)
644 			 * of the specified week of the year.
645 			 */
646 			tmpwday = (flags & FLAG_WDAY) ? tm->tm_wday :
647 			    day_offset;
648 			tmpyday = (7 - fwo + day_offset) % 7 +
649 			    (week_offset - 1) * 7 +
650 			    (tmpwday - day_offset + 7) % 7;
651 			/* Impossible yday for incomplete week (week 0). */
652 			if (tmpyday < 0) {
653 				if (flags & FLAG_WDAY)
654 					return (NULL);
655 				tmpyday = 0;
656 			}
657 			tm->tm_yday = tmpyday;
658 			flags |= FLAG_YDAY;
659 		}
660 	}
661 
662 	if ((flags & (FLAG_YEAR | FLAG_YDAY)) == (FLAG_YEAR | FLAG_YDAY)) {
663 		if (!(flags & FLAG_MONTH)) {
664 			i = 0;
665 			while (tm->tm_yday >=
666 			    start_of_month[isleap(tm->tm_year +
667 			    TM_YEAR_BASE)][i])
668 				i++;
669 			if (i > 12) {
670 				i = 1;
671 				tm->tm_yday -=
672 				    start_of_month[isleap(tm->tm_year +
673 				    TM_YEAR_BASE)][12];
674 				tm->tm_year++;
675 			}
676 			tm->tm_mon = i - 1;
677 			flags |= FLAG_MONTH;
678 		}
679 		if (!(flags & FLAG_MDAY)) {
680 			tm->tm_mday = tm->tm_yday -
681 			    start_of_month[isleap(tm->tm_year + TM_YEAR_BASE)]
682 			    [tm->tm_mon] + 1;
683 			flags |= FLAG_MDAY;
684 		}
685 		if (!(flags & FLAG_WDAY)) {
686 			i = 0;
687 			wday_offset = first_wday_of(tm->tm_year);
688 			while (i++ <= tm->tm_yday) {
689 				if (wday_offset++ >= 6)
690 					wday_offset = 0;
691 			}
692 			tm->tm_wday = wday_offset;
693 			flags |= FLAG_WDAY;
694 		}
695 	}
696 
697 	return ((char *)buf);
698 }
699 
700 char *
701 strptime_l(const char * __restrict buf, const char * __restrict fmt,
702     struct tm * __restrict tm, locale_t loc)
703 {
704 	char *ret;
705 	int gmt;
706 	FIX_LOCALE(loc);
707 
708 	gmt = 0;
709 	ret = _strptime(buf, fmt, tm, &gmt, loc);
710 	if (ret && gmt) {
711 		time_t t = timegm(tm);
712 
713 		localtime_r(&t, tm);
714 	}
715 
716 	return (ret);
717 }
718 
719 char *
720 strptime(const char * __restrict buf, const char * __restrict fmt,
721     struct tm * __restrict tm)
722 {
723 	return strptime_l(buf, fmt, tm, __get_locale());
724 }
725