xref: /freebsd/lib/libc/stdtime/strptime.c (revision 22cf89c938886d14f5796fc49f9f020c23ea8eaf)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2014 Gary Mills
5  * Copyright 2011, Nexenta Systems, Inc.  All rights reserved.
6  * Copyright (c) 1994 Powerdog Industries.  All rights reserved.
7  *
8  * Copyright (c) 2011 The FreeBSD Foundation
9  *
10  * Portions of this software were developed by David Chisnall
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer
20  *    in the documentation and/or other materials provided with the
21  *    distribution.
22  *
23  * THIS SOFTWARE IS PROVIDED BY POWERDOG INDUSTRIES ``AS IS'' AND ANY
24  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE POWERDOG INDUSTRIES BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
31  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
32  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
33  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  * The views and conclusions contained in the software and documentation
36  * are those of the authors and should not be interpreted as representing
37  * official policies, either expressed or implied, of Powerdog Industries.
38  */
39 
40 #include <sys/cdefs.h>
41 #ifndef lint
42 #ifndef NOID
43 static char copyright[] __unused =
44 "@(#) Copyright (c) 1994 Powerdog Industries.  All rights reserved.";
45 static char sccsid[] __unused = "@(#)strptime.c	0.1 (Powerdog) 94/03/27";
46 #endif /* !defined NOID */
47 #endif /* not lint */
48 #include "namespace.h"
49 #include <time.h>
50 #include <ctype.h>
51 #include <errno.h>
52 #include <stdlib.h>
53 #include <string.h>
54 #include <pthread.h>
55 #include "private.h"
56 #include "un-namespace.h"
57 #include "libc_private.h"
58 #include "timelocal.h"
59 #include "tzfile.h"
60 
61 static char * _strptime(const char *, const char *, struct tm *, int *, locale_t);
62 
63 #define	asizeof(a)	(sizeof(a) / sizeof((a)[0]))
64 
65 #define	FLAG_NONE	(1 << 0)
66 #define	FLAG_YEAR	(1 << 1)
67 #define	FLAG_MONTH	(1 << 2)
68 #define	FLAG_YDAY	(1 << 3)
69 #define	FLAG_MDAY	(1 << 4)
70 #define	FLAG_WDAY	(1 << 5)
71 
72 /*
73  * Calculate the week day of the first day of a year. Valid for
74  * the Gregorian calendar, which began Sept 14, 1752 in the UK
75  * and its colonies. Ref:
76  * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week
77  */
78 
79 static int
80 first_wday_of(int year)
81 {
82 	return (((2 * (3 - (year / 100) % 4)) + (year % 100) +
83 		((year % 100) / 4) + (isleap(year) ? 6 : 0) + 1) % 7);
84 }
85 
86 static char *
87 _strptime(const char *buf, const char *fmt, struct tm *tm, int *GMTp,
88 		locale_t locale)
89 {
90 	char	c;
91 	const char *ptr;
92 	int	day_offset = -1, wday_offset;
93 	int week_offset;
94 	int	i, len;
95 	int flags;
96 	int Ealternative, Oalternative;
97 	int century, year;
98 	const struct lc_time_T *tptr = __get_current_time_locale(locale);
99 	static int start_of_month[2][13] = {
100 		{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
101 		{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
102 	};
103 
104 	flags = FLAG_NONE;
105 	century = -1;
106 	year = -1;
107 
108 	ptr = fmt;
109 	while (*ptr != 0) {
110 		c = *ptr++;
111 
112 		if (c != '%') {
113 			if (isspace_l((unsigned char)c, locale))
114 				while (*buf != 0 &&
115 				       isspace_l((unsigned char)*buf, locale))
116 					buf++;
117 			else if (c != *buf++)
118 				return (NULL);
119 			continue;
120 		}
121 
122 		Ealternative = 0;
123 		Oalternative = 0;
124 label:
125 		c = *ptr++;
126 		switch (c) {
127 		case '%':
128 			if (*buf++ != '%')
129 				return (NULL);
130 			break;
131 
132 		case '+':
133 			buf = _strptime(buf, tptr->date_fmt, tm, GMTp, locale);
134 			if (buf == NULL)
135 				return (NULL);
136 			flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
137 			break;
138 
139 		case 'C':
140 			if (!isdigit_l((unsigned char)*buf, locale))
141 				return (NULL);
142 
143 			/* XXX This will break for 3-digit centuries. */
144 			len = 2;
145 			for (i = 0; len && *buf != 0 &&
146 			     isdigit_l((unsigned char)*buf, locale); buf++) {
147 				i *= 10;
148 				i += *buf - '0';
149 				len--;
150 			}
151 
152 			century = i;
153 			flags |= FLAG_YEAR;
154 
155 			break;
156 
157 		case 'c':
158 			buf = _strptime(buf, tptr->c_fmt, tm, GMTp, locale);
159 			if (buf == NULL)
160 				return (NULL);
161 			flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
162 			break;
163 
164 		case 'D':
165 			buf = _strptime(buf, "%m/%d/%y", tm, GMTp, locale);
166 			if (buf == NULL)
167 				return (NULL);
168 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
169 			break;
170 
171 		case 'E':
172 			if (Ealternative || Oalternative)
173 				break;
174 			Ealternative++;
175 			goto label;
176 
177 		case 'O':
178 			if (Ealternative || Oalternative)
179 				break;
180 			Oalternative++;
181 			goto label;
182 
183 		case 'F':
184 			buf = _strptime(buf, "%Y-%m-%d", tm, GMTp, locale);
185 			if (buf == NULL)
186 				return (NULL);
187 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
188 			break;
189 
190 		case 'R':
191 			buf = _strptime(buf, "%H:%M", tm, GMTp, locale);
192 			if (buf == NULL)
193 				return (NULL);
194 			break;
195 
196 		case 'r':
197 			buf = _strptime(buf, tptr->ampm_fmt, tm, GMTp, locale);
198 			if (buf == NULL)
199 				return (NULL);
200 			break;
201 
202 		case 'T':
203 			buf = _strptime(buf, "%H:%M:%S", tm, GMTp, locale);
204 			if (buf == NULL)
205 				return (NULL);
206 			break;
207 
208 		case 'X':
209 			buf = _strptime(buf, tptr->X_fmt, tm, GMTp, locale);
210 			if (buf == NULL)
211 				return (NULL);
212 			break;
213 
214 		case 'x':
215 			buf = _strptime(buf, tptr->x_fmt, tm, GMTp, locale);
216 			if (buf == NULL)
217 				return (NULL);
218 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
219 			break;
220 
221 		case 'j':
222 			if (!isdigit_l((unsigned char)*buf, locale))
223 				return (NULL);
224 
225 			len = 3;
226 			for (i = 0; len && *buf != 0 &&
227 			     isdigit_l((unsigned char)*buf, locale); buf++){
228 				i *= 10;
229 				i += *buf - '0';
230 				len--;
231 			}
232 			if (i < 1 || i > 366)
233 				return (NULL);
234 
235 			tm->tm_yday = i - 1;
236 			flags |= FLAG_YDAY;
237 
238 			break;
239 
240 		case 'M':
241 		case 'S':
242 			if (*buf == 0 ||
243 				isspace_l((unsigned char)*buf, locale))
244 				break;
245 
246 			if (!isdigit_l((unsigned char)*buf, locale))
247 				return (NULL);
248 
249 			len = 2;
250 			for (i = 0; len && *buf != 0 &&
251 				isdigit_l((unsigned char)*buf, locale); buf++){
252 				i *= 10;
253 				i += *buf - '0';
254 				len--;
255 			}
256 
257 			if (c == 'M') {
258 				if (i > 59)
259 					return (NULL);
260 				tm->tm_min = i;
261 			} else {
262 				if (i > 60)
263 					return (NULL);
264 				tm->tm_sec = i;
265 			}
266 
267 			break;
268 
269 		case 'H':
270 		case 'I':
271 		case 'k':
272 		case 'l':
273 			/*
274 			 * %k and %l specifiers are documented as being
275 			 * blank-padded.  However, there is no harm in
276 			 * allowing zero-padding.
277 			 *
278 			 * XXX %k and %l specifiers may gobble one too many
279 			 * digits if used incorrectly.
280 			 */
281 
282 			len = 2;
283 			if ((c == 'k' || c == 'l') &&
284 			    isblank_l((unsigned char)*buf, locale)) {
285 				buf++;
286 				len = 1;
287 			}
288 
289 			if (!isdigit_l((unsigned char)*buf, locale))
290 				return (NULL);
291 
292 			for (i = 0; len && *buf != 0 &&
293 			     isdigit_l((unsigned char)*buf, locale); buf++) {
294 				i *= 10;
295 				i += *buf - '0';
296 				len--;
297 			}
298 			if (c == 'H' || c == 'k') {
299 				if (i > 23)
300 					return (NULL);
301 			} else if (i == 0 || i > 12)
302 				return (NULL);
303 
304 			tm->tm_hour = i;
305 
306 			break;
307 
308 		case 'p':
309 			/*
310 			 * XXX This is bogus if parsed before hour-related
311 			 * specifiers.
312 			 */
313 			if (tm->tm_hour > 12)
314 				return (NULL);
315 
316 			len = strlen(tptr->am);
317 			if (strncasecmp_l(buf, tptr->am, len, locale) == 0) {
318 				if (tm->tm_hour == 12)
319 					tm->tm_hour = 0;
320 				buf += len;
321 				break;
322 			}
323 
324 			len = strlen(tptr->pm);
325 			if (strncasecmp_l(buf, tptr->pm, len, locale) == 0) {
326 				if (tm->tm_hour != 12)
327 					tm->tm_hour += 12;
328 				buf += len;
329 				break;
330 			}
331 
332 			return (NULL);
333 
334 		case 'A':
335 		case 'a':
336 			for (i = 0; i < asizeof(tptr->weekday); i++) {
337 				len = strlen(tptr->weekday[i]);
338 				if (strncasecmp_l(buf, tptr->weekday[i],
339 						len, locale) == 0)
340 					break;
341 				len = strlen(tptr->wday[i]);
342 				if (strncasecmp_l(buf, tptr->wday[i],
343 						len, locale) == 0)
344 					break;
345 			}
346 			if (i == asizeof(tptr->weekday))
347 				return (NULL);
348 
349 			buf += len;
350 			tm->tm_wday = i;
351 			flags |= FLAG_WDAY;
352 			break;
353 
354 		case 'U':
355 		case 'W':
356 			/*
357 			 * XXX This is bogus, as we can not assume any valid
358 			 * information present in the tm structure at this
359 			 * point to calculate a real value, so just check the
360 			 * range for now.
361 			 */
362 			if (!isdigit_l((unsigned char)*buf, locale))
363 				return (NULL);
364 
365 			len = 2;
366 			for (i = 0; len && *buf != 0 &&
367 			     isdigit_l((unsigned char)*buf, locale); buf++) {
368 				i *= 10;
369 				i += *buf - '0';
370 				len--;
371 			}
372 			if (i > 53)
373 				return (NULL);
374 
375 			if (c == 'U')
376 				day_offset = TM_SUNDAY;
377 			else
378 				day_offset = TM_MONDAY;
379 
380 
381 			week_offset = i;
382 
383 			break;
384 
385 		case 'u':
386 		case 'w':
387 			if (!isdigit_l((unsigned char)*buf, locale))
388 				return (NULL);
389 
390 			i = *buf++ - '0';
391 			if (i < 0 || i > 7 || (c == 'u' && i < 1) ||
392 			    (c == 'w' && i > 6))
393 				return (NULL);
394 
395 			tm->tm_wday = i % 7;
396 			flags |= FLAG_WDAY;
397 
398 			break;
399 
400 		case 'e':
401 			/*
402 			 * With %e format, our strftime(3) adds a blank space
403 			 * before single digits.
404 			 */
405 			if (*buf != 0 &&
406 			    isspace_l((unsigned char)*buf, locale))
407 			       buf++;
408 			/* FALLTHROUGH */
409 		case 'd':
410 			/*
411 			 * The %e specifier was once explicitly documented as
412 			 * not being zero-padded but was later changed to
413 			 * equivalent to %d.  There is no harm in allowing
414 			 * such padding.
415 			 *
416 			 * XXX The %e specifier may gobble one too many
417 			 * digits if used incorrectly.
418 			 */
419 			if (!isdigit_l((unsigned char)*buf, locale))
420 				return (NULL);
421 
422 			len = 2;
423 			for (i = 0; len && *buf != 0 &&
424 			     isdigit_l((unsigned char)*buf, locale); buf++) {
425 				i *= 10;
426 				i += *buf - '0';
427 				len--;
428 			}
429 			if (i == 0 || i > 31)
430 				return (NULL);
431 
432 			tm->tm_mday = i;
433 			flags |= FLAG_MDAY;
434 
435 			break;
436 
437 		case 'B':
438 		case 'b':
439 		case 'h':
440 			for (i = 0; i < asizeof(tptr->month); i++) {
441 				if (Oalternative) {
442 					if (c == 'B') {
443 						len = strlen(tptr->alt_month[i]);
444 						if (strncasecmp_l(buf,
445 								tptr->alt_month[i],
446 								len, locale) == 0)
447 							break;
448 					}
449 				} else {
450 					len = strlen(tptr->month[i]);
451 					if (strncasecmp_l(buf, tptr->month[i],
452 							len, locale) == 0)
453 						break;
454 				}
455 			}
456 			/*
457 			 * Try the abbreviated month name if the full name
458 			 * wasn't found and Oalternative was not requested.
459 			 */
460 			if (i == asizeof(tptr->month) && !Oalternative) {
461 				for (i = 0; i < asizeof(tptr->month); i++) {
462 					len = strlen(tptr->mon[i]);
463 					if (strncasecmp_l(buf, tptr->mon[i],
464 							len, locale) == 0)
465 						break;
466 				}
467 			}
468 			if (i == asizeof(tptr->month))
469 				return (NULL);
470 
471 			tm->tm_mon = i;
472 			buf += len;
473 			flags |= FLAG_MONTH;
474 
475 			break;
476 
477 		case 'm':
478 			if (!isdigit_l((unsigned char)*buf, locale))
479 				return (NULL);
480 
481 			len = 2;
482 			for (i = 0; len && *buf != 0 &&
483 			     isdigit_l((unsigned char)*buf, locale); buf++) {
484 				i *= 10;
485 				i += *buf - '0';
486 				len--;
487 			}
488 			if (i < 1 || i > 12)
489 				return (NULL);
490 
491 			tm->tm_mon = i - 1;
492 			flags |= FLAG_MONTH;
493 
494 			break;
495 
496 		case 's':
497 			{
498 			char *cp;
499 			int sverrno;
500 			long n;
501 			time_t t;
502 
503 			sverrno = errno;
504 			errno = 0;
505 			n = strtol_l(buf, &cp, 10, locale);
506 			if (errno == ERANGE || (long)(t = n) != n) {
507 				errno = sverrno;
508 				return (NULL);
509 			}
510 			errno = sverrno;
511 			buf = cp;
512 			if (gmtime_r(&t, tm) == NULL)
513 				return (NULL);
514 			*GMTp = 1;
515 			flags |= FLAG_YDAY | FLAG_WDAY | FLAG_MONTH |
516 			    FLAG_MDAY | FLAG_YEAR;
517 			}
518 			break;
519 
520 		case 'Y':
521 		case 'y':
522 			if (*buf == 0 ||
523 			    isspace_l((unsigned char)*buf, locale))
524 				break;
525 
526 			if (!isdigit_l((unsigned char)*buf, locale))
527 				return (NULL);
528 
529 			len = (c == 'Y') ? 4 : 2;
530 			for (i = 0; len && *buf != 0 &&
531 			     isdigit_l((unsigned char)*buf, locale); buf++) {
532 				i *= 10;
533 				i += *buf - '0';
534 				len--;
535 			}
536 			if (c == 'Y')
537 				century = i / 100;
538 			year = i % 100;
539 
540 			flags |= FLAG_YEAR;
541 
542 			break;
543 
544 		case 'Z':
545 			{
546 			const char *cp;
547 			char *zonestr;
548 
549 			for (cp = buf; *cp &&
550 			     isupper_l((unsigned char)*cp, locale); ++cp) {
551 				/*empty*/}
552 			if (cp - buf) {
553 				zonestr = alloca(cp - buf + 1);
554 				strncpy(zonestr, buf, cp - buf);
555 				zonestr[cp - buf] = '\0';
556 				tzset();
557 				if (0 == strcmp(zonestr, "GMT") ||
558 				    0 == strcmp(zonestr, "UTC")) {
559 				    *GMTp = 1;
560 				} else if (0 == strcmp(zonestr, tzname[0])) {
561 				    tm->tm_isdst = 0;
562 				} else if (0 == strcmp(zonestr, tzname[1])) {
563 				    tm->tm_isdst = 1;
564 				} else {
565 				    return (NULL);
566 				}
567 				buf += cp - buf;
568 			}
569 			}
570 			break;
571 
572 		case 'z':
573 			{
574 			int sign = 1;
575 
576 			if (*buf != '+') {
577 				if (*buf == '-')
578 					sign = -1;
579 				else
580 					return (NULL);
581 			}
582 
583 			buf++;
584 			i = 0;
585 			for (len = 4; len > 0; len--) {
586 				if (isdigit_l((unsigned char)*buf, locale)) {
587 					i *= 10;
588 					i += *buf - '0';
589 					buf++;
590 				} else if (len == 2) {
591 					i *= 100;
592 					break;
593 				} else
594 					return (NULL);
595 			}
596 
597 			if (i > 1400 || (sign == -1 && i > 1200) ||
598 			    (i % 100) >= 60)
599 				return (NULL);
600 			tm->tm_hour -= sign * (i / 100);
601 			tm->tm_min  -= sign * (i % 100);
602 			*GMTp = 1;
603 			}
604 			break;
605 
606 		case 'n':
607 		case 't':
608 			while (isspace_l((unsigned char)*buf, locale))
609 				buf++;
610 			break;
611 
612 		default:
613 			return (NULL);
614 		}
615 	}
616 
617 	if (century != -1 || year != -1) {
618 		if (year == -1)
619 			year = 0;
620 		if (century == -1) {
621 			if (year < 69)
622 				year += 100;
623 		} else
624 			year += century * 100 - TM_YEAR_BASE;
625 		tm->tm_year = year;
626 	}
627 
628 	if (!(flags & FLAG_YDAY) && (flags & FLAG_YEAR)) {
629 		if ((flags & (FLAG_MONTH | FLAG_MDAY)) ==
630 		    (FLAG_MONTH | FLAG_MDAY)) {
631 			tm->tm_yday = start_of_month[isleap(tm->tm_year +
632 			    TM_YEAR_BASE)][tm->tm_mon] + (tm->tm_mday - 1);
633 			flags |= FLAG_YDAY;
634 		} else if (day_offset != -1) {
635 			int tmpwday, tmpyday, fwo;
636 
637 			fwo = first_wday_of(tm->tm_year + TM_YEAR_BASE);
638 			/* No incomplete week (week 0). */
639 			if (week_offset == 0 && fwo == day_offset)
640 				return (NULL);
641 
642 			/* Set the date to the first Sunday (or Monday)
643 			 * of the specified week of the year.
644 			 */
645 			tmpwday = (flags & FLAG_WDAY) ? tm->tm_wday :
646 			    day_offset;
647 			tmpyday = (7 - fwo + day_offset) % 7 +
648 			    (week_offset - 1) * 7 +
649 			    (tmpwday - day_offset + 7) % 7;
650 			/* Impossible yday for incomplete week (week 0). */
651 			if (tmpyday < 0) {
652 				if (flags & FLAG_WDAY)
653 					return (NULL);
654 				tmpyday = 0;
655 			}
656 			tm->tm_yday = tmpyday;
657 			flags |= FLAG_YDAY;
658 		}
659 	}
660 
661 	if ((flags & (FLAG_YEAR | FLAG_YDAY)) == (FLAG_YEAR | FLAG_YDAY)) {
662 		if (!(flags & FLAG_MONTH)) {
663 			i = 0;
664 			while (tm->tm_yday >=
665 			    start_of_month[isleap(tm->tm_year +
666 			    TM_YEAR_BASE)][i])
667 				i++;
668 			if (i > 12) {
669 				i = 1;
670 				tm->tm_yday -=
671 				    start_of_month[isleap(tm->tm_year +
672 				    TM_YEAR_BASE)][12];
673 				tm->tm_year++;
674 			}
675 			tm->tm_mon = i - 1;
676 			flags |= FLAG_MONTH;
677 		}
678 		if (!(flags & FLAG_MDAY)) {
679 			tm->tm_mday = tm->tm_yday -
680 			    start_of_month[isleap(tm->tm_year + TM_YEAR_BASE)]
681 			    [tm->tm_mon] + 1;
682 			flags |= FLAG_MDAY;
683 		}
684 		if (!(flags & FLAG_WDAY)) {
685 			i = 0;
686 			wday_offset = first_wday_of(tm->tm_year);
687 			while (i++ <= tm->tm_yday) {
688 				if (wday_offset++ >= 6)
689 					wday_offset = 0;
690 			}
691 			tm->tm_wday = wday_offset;
692 			flags |= FLAG_WDAY;
693 		}
694 	}
695 
696 	return ((char *)buf);
697 }
698 
699 char *
700 strptime_l(const char * __restrict buf, const char * __restrict fmt,
701     struct tm * __restrict tm, locale_t loc)
702 {
703 	char *ret;
704 	int gmt;
705 	FIX_LOCALE(loc);
706 
707 	gmt = 0;
708 	ret = _strptime(buf, fmt, tm, &gmt, loc);
709 	if (ret && gmt) {
710 		time_t t = timegm(tm);
711 
712 		localtime_r(&t, tm);
713 	}
714 
715 	return (ret);
716 }
717 
718 char *
719 strptime(const char * __restrict buf, const char * __restrict fmt,
720     struct tm * __restrict tm)
721 {
722 	return strptime_l(buf, fmt, tm, __get_locale());
723 }
724