xref: /freebsd/lib/libc/stdtime/strptime.c (revision 4f52dfbb8d6c4d446500c5b097e3806ec219fbd4)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2014 Gary Mills
5  * Copyright 2011, Nexenta Systems, Inc.  All rights reserved.
6  * Copyright (c) 1994 Powerdog Industries.  All rights reserved.
7  *
8  * Copyright (c) 2011 The FreeBSD Foundation
9  * All rights reserved.
10  * Portions of this software were developed by David Chisnall
11  * under sponsorship from the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer
20  *    in the documentation and/or other materials provided with the
21  *    distribution.
22  *
23  * THIS SOFTWARE IS PROVIDED BY POWERDOG INDUSTRIES ``AS IS'' AND ANY
24  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
26  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE POWERDOG INDUSTRIES BE
27  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
30  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
31  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
32  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
33  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34  *
35  * The views and conclusions contained in the software and documentation
36  * are those of the authors and should not be interpreted as representing
37  * official policies, either expressed or implied, of Powerdog Industries.
38  */
39 
40 #include <sys/cdefs.h>
41 #ifndef lint
42 #ifndef NOID
43 static char copyright[] __unused =
44 "@(#) Copyright (c) 1994 Powerdog Industries.  All rights reserved.";
45 static char sccsid[] __unused = "@(#)strptime.c	0.1 (Powerdog) 94/03/27";
46 #endif /* !defined NOID */
47 #endif /* not lint */
48 __FBSDID("$FreeBSD$");
49 
50 #include "namespace.h"
51 #include <time.h>
52 #include <ctype.h>
53 #include <errno.h>
54 #include <stdlib.h>
55 #include <string.h>
56 #include <pthread.h>
57 #include "un-namespace.h"
58 #include "libc_private.h"
59 #include "timelocal.h"
60 #include "tzfile.h"
61 
62 static char * _strptime(const char *, const char *, struct tm *, int *, locale_t);
63 
64 #define	asizeof(a)	(sizeof(a) / sizeof((a)[0]))
65 
66 #define	FLAG_NONE	(1 << 0)
67 #define	FLAG_YEAR	(1 << 1)
68 #define	FLAG_MONTH	(1 << 2)
69 #define	FLAG_YDAY	(1 << 3)
70 #define	FLAG_MDAY	(1 << 4)
71 #define	FLAG_WDAY	(1 << 5)
72 
73 /*
74  * Calculate the week day of the first day of a year. Valid for
75  * the Gregorian calendar, which began Sept 14, 1752 in the UK
76  * and its colonies. Ref:
77  * http://en.wikipedia.org/wiki/Determination_of_the_day_of_the_week
78  */
79 
80 static int
81 first_wday_of(int year)
82 {
83 	return (((2 * (3 - (year / 100) % 4)) + (year % 100) +
84 		((year % 100) / 4) + (isleap(year) ? 6 : 0) + 1) % 7);
85 }
86 
87 static char *
88 _strptime(const char *buf, const char *fmt, struct tm *tm, int *GMTp,
89 		locale_t locale)
90 {
91 	char	c;
92 	const char *ptr;
93 	int	day_offset = -1, wday_offset;
94 	int week_offset;
95 	int	i, len;
96 	int flags;
97 	int Ealternative, Oalternative;
98 	const struct lc_time_T *tptr = __get_current_time_locale(locale);
99 	static int start_of_month[2][13] = {
100 		{0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365},
101 		{0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366}
102 	};
103 
104 	flags = FLAG_NONE;
105 
106 	ptr = fmt;
107 	while (*ptr != 0) {
108 		c = *ptr++;
109 
110 		if (c != '%') {
111 			if (isspace_l((unsigned char)c, locale))
112 				while (*buf != 0 &&
113 				       isspace_l((unsigned char)*buf, locale))
114 					buf++;
115 			else if (c != *buf++)
116 				return (NULL);
117 			continue;
118 		}
119 
120 		Ealternative = 0;
121 		Oalternative = 0;
122 label:
123 		c = *ptr++;
124 		switch (c) {
125 		case '%':
126 			if (*buf++ != '%')
127 				return (NULL);
128 			break;
129 
130 		case '+':
131 			buf = _strptime(buf, tptr->date_fmt, tm, GMTp, locale);
132 			if (buf == NULL)
133 				return (NULL);
134 			flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
135 			break;
136 
137 		case 'C':
138 			if (!isdigit_l((unsigned char)*buf, locale))
139 				return (NULL);
140 
141 			/* XXX This will break for 3-digit centuries. */
142 			len = 2;
143 			for (i = 0; len && *buf != 0 &&
144 			     isdigit_l((unsigned char)*buf, locale); buf++) {
145 				i *= 10;
146 				i += *buf - '0';
147 				len--;
148 			}
149 			if (i < 19)
150 				return (NULL);
151 
152 			tm->tm_year = i * 100 - TM_YEAR_BASE;
153 			flags |= FLAG_YEAR;
154 
155 			break;
156 
157 		case 'c':
158 			buf = _strptime(buf, tptr->c_fmt, tm, GMTp, locale);
159 			if (buf == NULL)
160 				return (NULL);
161 			flags |= FLAG_WDAY | FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
162 			break;
163 
164 		case 'D':
165 			buf = _strptime(buf, "%m/%d/%y", tm, GMTp, locale);
166 			if (buf == NULL)
167 				return (NULL);
168 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
169 			break;
170 
171 		case 'E':
172 			if (Ealternative || Oalternative)
173 				break;
174 			Ealternative++;
175 			goto label;
176 
177 		case 'O':
178 			if (Ealternative || Oalternative)
179 				break;
180 			Oalternative++;
181 			goto label;
182 
183 		case 'F':
184 			buf = _strptime(buf, "%Y-%m-%d", tm, GMTp, locale);
185 			if (buf == NULL)
186 				return (NULL);
187 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
188 			break;
189 
190 		case 'R':
191 			buf = _strptime(buf, "%H:%M", tm, GMTp, locale);
192 			if (buf == NULL)
193 				return (NULL);
194 			break;
195 
196 		case 'r':
197 			buf = _strptime(buf, tptr->ampm_fmt, tm, GMTp, locale);
198 			if (buf == NULL)
199 				return (NULL);
200 			break;
201 
202 		case 'T':
203 			buf = _strptime(buf, "%H:%M:%S", tm, GMTp, locale);
204 			if (buf == NULL)
205 				return (NULL);
206 			break;
207 
208 		case 'X':
209 			buf = _strptime(buf, tptr->X_fmt, tm, GMTp, locale);
210 			if (buf == NULL)
211 				return (NULL);
212 			break;
213 
214 		case 'x':
215 			buf = _strptime(buf, tptr->x_fmt, tm, GMTp, locale);
216 			if (buf == NULL)
217 				return (NULL);
218 			flags |= FLAG_MONTH | FLAG_MDAY | FLAG_YEAR;
219 			break;
220 
221 		case 'j':
222 			if (!isdigit_l((unsigned char)*buf, locale))
223 				return (NULL);
224 
225 			len = 3;
226 			for (i = 0; len && *buf != 0 &&
227 			     isdigit_l((unsigned char)*buf, locale); buf++){
228 				i *= 10;
229 				i += *buf - '0';
230 				len--;
231 			}
232 			if (i < 1 || i > 366)
233 				return (NULL);
234 
235 			tm->tm_yday = i - 1;
236 			flags |= FLAG_YDAY;
237 
238 			break;
239 
240 		case 'M':
241 		case 'S':
242 			if (*buf == 0 ||
243 				isspace_l((unsigned char)*buf, locale))
244 				break;
245 
246 			if (!isdigit_l((unsigned char)*buf, locale))
247 				return (NULL);
248 
249 			len = 2;
250 			for (i = 0; len && *buf != 0 &&
251 				isdigit_l((unsigned char)*buf, locale); buf++){
252 				i *= 10;
253 				i += *buf - '0';
254 				len--;
255 			}
256 
257 			if (c == 'M') {
258 				if (i > 59)
259 					return (NULL);
260 				tm->tm_min = i;
261 			} else {
262 				if (i > 60)
263 					return (NULL);
264 				tm->tm_sec = i;
265 			}
266 
267 			break;
268 
269 		case 'H':
270 		case 'I':
271 		case 'k':
272 		case 'l':
273 			/*
274 			 * Of these, %l is the only specifier explicitly
275 			 * documented as not being zero-padded.  However,
276 			 * there is no harm in allowing zero-padding.
277 			 *
278 			 * XXX The %l specifier may gobble one too many
279 			 * digits if used incorrectly.
280 			 */
281 			if (!isdigit_l((unsigned char)*buf, locale))
282 				return (NULL);
283 
284 			len = 2;
285 			for (i = 0; len && *buf != 0 &&
286 			     isdigit_l((unsigned char)*buf, locale); buf++) {
287 				i *= 10;
288 				i += *buf - '0';
289 				len--;
290 			}
291 			if (c == 'H' || c == 'k') {
292 				if (i > 23)
293 					return (NULL);
294 			} else if (i > 12)
295 				return (NULL);
296 
297 			tm->tm_hour = i;
298 
299 			break;
300 
301 		case 'p':
302 			/*
303 			 * XXX This is bogus if parsed before hour-related
304 			 * specifiers.
305 			 */
306 			if (tm->tm_hour > 12)
307 				return (NULL);
308 
309 			len = strlen(tptr->am);
310 			if (strncasecmp_l(buf, tptr->am, len, locale) == 0) {
311 				if (tm->tm_hour == 12)
312 					tm->tm_hour = 0;
313 				buf += len;
314 				break;
315 			}
316 
317 			len = strlen(tptr->pm);
318 			if (strncasecmp_l(buf, tptr->pm, len, locale) == 0) {
319 				if (tm->tm_hour != 12)
320 					tm->tm_hour += 12;
321 				buf += len;
322 				break;
323 			}
324 
325 			return (NULL);
326 
327 		case 'A':
328 		case 'a':
329 			for (i = 0; i < asizeof(tptr->weekday); i++) {
330 				len = strlen(tptr->weekday[i]);
331 				if (strncasecmp_l(buf, tptr->weekday[i],
332 						len, locale) == 0)
333 					break;
334 				len = strlen(tptr->wday[i]);
335 				if (strncasecmp_l(buf, tptr->wday[i],
336 						len, locale) == 0)
337 					break;
338 			}
339 			if (i == asizeof(tptr->weekday))
340 				return (NULL);
341 
342 			buf += len;
343 			tm->tm_wday = i;
344 			flags |= FLAG_WDAY;
345 			break;
346 
347 		case 'U':
348 		case 'W':
349 			/*
350 			 * XXX This is bogus, as we can not assume any valid
351 			 * information present in the tm structure at this
352 			 * point to calculate a real value, so just check the
353 			 * range for now.
354 			 */
355 			if (!isdigit_l((unsigned char)*buf, locale))
356 				return (NULL);
357 
358 			len = 2;
359 			for (i = 0; len && *buf != 0 &&
360 			     isdigit_l((unsigned char)*buf, locale); buf++) {
361 				i *= 10;
362 				i += *buf - '0';
363 				len--;
364 			}
365 			if (i > 53)
366 				return (NULL);
367 
368 			if (c == 'U')
369 				day_offset = TM_SUNDAY;
370 			else
371 				day_offset = TM_MONDAY;
372 
373 
374 			week_offset = i;
375 
376 			break;
377 
378 		case 'u':
379 		case 'w':
380 			if (!isdigit_l((unsigned char)*buf, locale))
381 				return (NULL);
382 
383 			i = *buf++ - '0';
384 			if (i < 0 || i > 7 || (c == 'u' && i < 1) ||
385 			    (c == 'w' && i > 6))
386 				return (NULL);
387 
388 			tm->tm_wday = i % 7;
389 			flags |= FLAG_WDAY;
390 
391 			break;
392 
393 		case 'e':
394 			/*
395 			 * With %e format, our strftime(3) adds a blank space
396 			 * before single digits.
397 			 */
398 			if (*buf != 0 &&
399 			    isspace_l((unsigned char)*buf, locale))
400 			       buf++;
401 			/* FALLTHROUGH */
402 		case 'd':
403 			/*
404 			 * The %e specifier was once explicitly documented as
405 			 * not being zero-padded but was later changed to
406 			 * equivalent to %d.  There is no harm in allowing
407 			 * such padding.
408 			 *
409 			 * XXX The %e specifier may gobble one too many
410 			 * digits if used incorrectly.
411 			 */
412 			if (!isdigit_l((unsigned char)*buf, locale))
413 				return (NULL);
414 
415 			len = 2;
416 			for (i = 0; len && *buf != 0 &&
417 			     isdigit_l((unsigned char)*buf, locale); buf++) {
418 				i *= 10;
419 				i += *buf - '0';
420 				len--;
421 			}
422 			if (i > 31)
423 				return (NULL);
424 
425 			tm->tm_mday = i;
426 			flags |= FLAG_MDAY;
427 
428 			break;
429 
430 		case 'B':
431 		case 'b':
432 		case 'h':
433 			for (i = 0; i < asizeof(tptr->month); i++) {
434 				if (Oalternative) {
435 					if (c == 'B') {
436 						len = strlen(tptr->alt_month[i]);
437 						if (strncasecmp_l(buf,
438 								tptr->alt_month[i],
439 								len, locale) == 0)
440 							break;
441 					}
442 				} else {
443 					len = strlen(tptr->month[i]);
444 					if (strncasecmp_l(buf, tptr->month[i],
445 							len, locale) == 0)
446 						break;
447 				}
448 			}
449 			/*
450 			 * Try the abbreviated month name if the full name
451 			 * wasn't found and Oalternative was not requested.
452 			 */
453 			if (i == asizeof(tptr->month) && !Oalternative) {
454 				for (i = 0; i < asizeof(tptr->month); i++) {
455 					len = strlen(tptr->mon[i]);
456 					if (strncasecmp_l(buf, tptr->mon[i],
457 							len, locale) == 0)
458 						break;
459 				}
460 			}
461 			if (i == asizeof(tptr->month))
462 				return (NULL);
463 
464 			tm->tm_mon = i;
465 			buf += len;
466 			flags |= FLAG_MONTH;
467 
468 			break;
469 
470 		case 'm':
471 			if (!isdigit_l((unsigned char)*buf, locale))
472 				return (NULL);
473 
474 			len = 2;
475 			for (i = 0; len && *buf != 0 &&
476 			     isdigit_l((unsigned char)*buf, locale); buf++) {
477 				i *= 10;
478 				i += *buf - '0';
479 				len--;
480 			}
481 			if (i < 1 || i > 12)
482 				return (NULL);
483 
484 			tm->tm_mon = i - 1;
485 			flags |= FLAG_MONTH;
486 
487 			break;
488 
489 		case 's':
490 			{
491 			char *cp;
492 			int sverrno;
493 			long n;
494 			time_t t;
495 
496 			sverrno = errno;
497 			errno = 0;
498 			n = strtol_l(buf, &cp, 10, locale);
499 			if (errno == ERANGE || (long)(t = n) != n) {
500 				errno = sverrno;
501 				return (NULL);
502 			}
503 			errno = sverrno;
504 			buf = cp;
505 			if (gmtime_r(&t, tm) == NULL)
506 				return (NULL);
507 			*GMTp = 1;
508 			flags |= FLAG_YDAY | FLAG_WDAY | FLAG_MONTH |
509 			    FLAG_MDAY | FLAG_YEAR;
510 			}
511 			break;
512 
513 		case 'Y':
514 		case 'y':
515 			if (*buf == 0 ||
516 			    isspace_l((unsigned char)*buf, locale))
517 				break;
518 
519 			if (!isdigit_l((unsigned char)*buf, locale))
520 				return (NULL);
521 
522 			len = (c == 'Y') ? 4 : 2;
523 			for (i = 0; len && *buf != 0 &&
524 			     isdigit_l((unsigned char)*buf, locale); buf++) {
525 				i *= 10;
526 				i += *buf - '0';
527 				len--;
528 			}
529 			if (c == 'Y')
530 				i -= TM_YEAR_BASE;
531 			if (c == 'y' && i < 69)
532 				i += 100;
533 			if (i < 0)
534 				return (NULL);
535 
536 			tm->tm_year = i;
537 			flags |= FLAG_YEAR;
538 
539 			break;
540 
541 		case 'Z':
542 			{
543 			const char *cp;
544 			char *zonestr;
545 
546 			for (cp = buf; *cp &&
547 			     isupper_l((unsigned char)*cp, locale); ++cp) {
548 				/*empty*/}
549 			if (cp - buf) {
550 				zonestr = alloca(cp - buf + 1);
551 				strncpy(zonestr, buf, cp - buf);
552 				zonestr[cp - buf] = '\0';
553 				tzset();
554 				if (0 == strcmp(zonestr, "GMT") ||
555 				    0 == strcmp(zonestr, "UTC")) {
556 				    *GMTp = 1;
557 				} else if (0 == strcmp(zonestr, tzname[0])) {
558 				    tm->tm_isdst = 0;
559 				} else if (0 == strcmp(zonestr, tzname[1])) {
560 				    tm->tm_isdst = 1;
561 				} else {
562 				    return (NULL);
563 				}
564 				buf += cp - buf;
565 			}
566 			}
567 			break;
568 
569 		case 'z':
570 			{
571 			int sign = 1;
572 
573 			if (*buf != '+') {
574 				if (*buf == '-')
575 					sign = -1;
576 				else
577 					return (NULL);
578 			}
579 
580 			buf++;
581 			i = 0;
582 			for (len = 4; len > 0; len--) {
583 				if (isdigit_l((unsigned char)*buf, locale)) {
584 					i *= 10;
585 					i += *buf - '0';
586 					buf++;
587 				} else if (len == 2) {
588 					i *= 100;
589 					break;
590 				} else
591 					return (NULL);
592 			}
593 
594 			if (i > 1400 || (sign == -1 && i > 1200) ||
595 			    (i % 100) >= 60)
596 				return (NULL);
597 			tm->tm_hour -= sign * (i / 100);
598 			tm->tm_min  -= sign * (i % 100);
599 			*GMTp = 1;
600 			}
601 			break;
602 
603 		case 'n':
604 		case 't':
605 			while (isspace_l((unsigned char)*buf, locale))
606 				buf++;
607 			break;
608 
609 		default:
610 			return (NULL);
611 		}
612 	}
613 
614 	if (!(flags & FLAG_YDAY) && (flags & FLAG_YEAR)) {
615 		if ((flags & (FLAG_MONTH | FLAG_MDAY)) ==
616 		    (FLAG_MONTH | FLAG_MDAY)) {
617 			tm->tm_yday = start_of_month[isleap(tm->tm_year +
618 			    TM_YEAR_BASE)][tm->tm_mon] + (tm->tm_mday - 1);
619 			flags |= FLAG_YDAY;
620 		} else if (day_offset != -1) {
621 			int tmpwday, tmpyday, fwo;
622 
623 			fwo = first_wday_of(tm->tm_year + TM_YEAR_BASE);
624 			/* No incomplete week (week 0). */
625 			if (week_offset == 0 && fwo == day_offset)
626 				return (NULL);
627 
628 			/* Set the date to the first Sunday (or Monday)
629 			 * of the specified week of the year.
630 			 */
631 			tmpwday = (flags & FLAG_WDAY) ? tm->tm_wday :
632 			    day_offset;
633 			tmpyday = (7 - fwo + day_offset) % 7 +
634 			    (week_offset - 1) * 7 +
635 			    (tmpwday - day_offset + 7) % 7;
636 			/* Impossible yday for incomplete week (week 0). */
637 			if (tmpyday < 0) {
638 				if (flags & FLAG_WDAY)
639 					return (NULL);
640 				tmpyday = 0;
641 			}
642 			tm->tm_yday = tmpyday;
643 			flags |= FLAG_YDAY;
644 		}
645 	}
646 
647 	if ((flags & (FLAG_YEAR | FLAG_YDAY)) == (FLAG_YEAR | FLAG_YDAY)) {
648 		if (!(flags & FLAG_MONTH)) {
649 			i = 0;
650 			while (tm->tm_yday >=
651 			    start_of_month[isleap(tm->tm_year +
652 			    TM_YEAR_BASE)][i])
653 				i++;
654 			if (i > 12) {
655 				i = 1;
656 				tm->tm_yday -=
657 				    start_of_month[isleap(tm->tm_year +
658 				    TM_YEAR_BASE)][12];
659 				tm->tm_year++;
660 			}
661 			tm->tm_mon = i - 1;
662 			flags |= FLAG_MONTH;
663 		}
664 		if (!(flags & FLAG_MDAY)) {
665 			tm->tm_mday = tm->tm_yday -
666 			    start_of_month[isleap(tm->tm_year + TM_YEAR_BASE)]
667 			    [tm->tm_mon] + 1;
668 			flags |= FLAG_MDAY;
669 		}
670 		if (!(flags & FLAG_WDAY)) {
671 			i = 0;
672 			wday_offset = first_wday_of(tm->tm_year);
673 			while (i++ <= tm->tm_yday) {
674 				if (wday_offset++ >= 6)
675 					wday_offset = 0;
676 			}
677 			tm->tm_wday = wday_offset;
678 			flags |= FLAG_WDAY;
679 		}
680 	}
681 
682 	return ((char *)buf);
683 }
684 
685 char *
686 strptime_l(const char * __restrict buf, const char * __restrict fmt,
687     struct tm * __restrict tm, locale_t loc)
688 {
689 	char *ret;
690 	int gmt;
691 	FIX_LOCALE(loc);
692 
693 	gmt = 0;
694 	ret = _strptime(buf, fmt, tm, &gmt, loc);
695 	if (ret && gmt) {
696 		time_t t = timegm(tm);
697 
698 		localtime_r(&t, tm);
699 	}
700 
701 	return (ret);
702 }
703 
704 char *
705 strptime(const char * __restrict buf, const char * __restrict fmt,
706     struct tm * __restrict tm)
707 {
708 	return strptime_l(buf, fmt, tm, __get_locale());
709 }
710