xref: /illumos-gate/usr/src/lib/libc/port/locale/strptime.c (revision 20a7641f9918de8574b8b3b47dbe35c4bfc78df1)
1 /*
2  * Copyright (c) 2014 Gary Mills
3  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
4  * Copyright 2011, Nexenta Systems, Inc.  All rights reserved.
5  * Copyright (c) 1994 Powerdog Industries.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer
16  *    in the documentation and/or other materials provided with the
17  *    distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY POWERDOG INDUSTRIES ``AS IS'' AND ANY
20  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE POWERDOG INDUSTRIES BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
29  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  *
31  * The views and conclusions contained in the software and documentation
32  * are those of the authors and should not be interpreted as representing
33  * official policies, either expressed or implied, of Powerdog Industries.
34  */
35 
36 #include "lint.h"
37 #include <time.h>
38 #include <ctype.h>
39 #include <errno.h>
40 #include <stdlib.h>
41 #include <string.h>
42 #include <pthread.h>
43 #include <alloca.h>
44 #include <locale.h>
45 #include "timelocal.h"
46 #include "localeimpl.h"
47 
48 #define	asizeof(a)	(sizeof (a) / sizeof ((a)[0]))
49 
50 #define	F_GMT		(1 << 0)
51 #define	F_RECURSE	(1 << 2)
52 
53 static char *
54 __strptime(const char *_RESTRICT_KYWD buf, const char *_RESTRICT_KYWD fmt,
55     struct tm *_RESTRICT_KYWD tm, int *_RESTRICT_KYWD flagsp,
56     locale_t _RESTRICT_KYWD loc)
57 {
58 	char	c;
59 	const char *ptr;
60 	int	i, len, recurse = 0;
61 	int Ealternative, Oalternative;
62 	const struct lc_time *tptr = loc->time;
63 
64 	if (*flagsp & F_RECURSE)
65 		recurse = 1;
66 	*flagsp |= F_RECURSE;
67 
68 	ptr = fmt;
69 	while (*ptr != 0) {
70 		if (*buf == 0)
71 			break;
72 
73 		c = *ptr++;
74 
75 		if (c != '%') {
76 			if (isspace(c))
77 				while (isspace(*buf))
78 					buf++;
79 			else if (c != *buf++)
80 				return (NULL);
81 			continue;
82 		}
83 
84 		Ealternative = 0;
85 		Oalternative = 0;
86 label:
87 		c = *ptr++;
88 		switch (c) {
89 		case 0:
90 		case '%':
91 			if (*buf++ != '%')
92 				return (NULL);
93 			break;
94 
95 		case '+':
96 			buf = __strptime(buf, tptr->date_fmt, tm, flagsp, loc);
97 			if (buf == NULL)
98 				return (NULL);
99 			break;
100 
101 		case 'C':
102 			if (!isdigit(*buf))
103 				return (NULL);
104 
105 			/* XXX This will break for 3-digit centuries. */
106 			len = 2;
107 			for (i = 0; len && isdigit(*buf); buf++) {
108 				i *= 10;
109 				i += *buf - '0';
110 				len--;
111 			}
112 			if (i < 19)
113 				return (NULL);
114 
115 			tm->tm_year = i * 100 - 1900;
116 			break;
117 
118 		case 'c':
119 			buf = __strptime(buf, tptr->c_fmt, tm, flagsp, loc);
120 			if (buf == NULL)
121 				return (NULL);
122 			break;
123 
124 		case 'D':
125 			buf = __strptime(buf, "%m/%d/%y", tm, flagsp, loc);
126 			if (buf == NULL)
127 				return (NULL);
128 			break;
129 
130 		case 'E':
131 			if (Ealternative || Oalternative)
132 				break;
133 			Ealternative++;
134 			goto label;
135 
136 		case 'O':
137 			if (Ealternative || Oalternative)
138 				break;
139 			Oalternative++;
140 			goto label;
141 
142 		case 'F':
143 			buf = __strptime(buf, "%Y-%m-%d", tm, flagsp, loc);
144 			if (buf == NULL)
145 				return (NULL);
146 			break;
147 
148 		case 'R':
149 			buf = __strptime(buf, "%H:%M", tm, flagsp, loc);
150 			if (buf == NULL)
151 				return (NULL);
152 			break;
153 
154 		case 'r':
155 			buf = __strptime(buf, tptr->ampm_fmt, tm, flagsp, loc);
156 			if (buf == NULL)
157 				return (NULL);
158 			break;
159 
160 		case 'T':
161 			buf = __strptime(buf, "%H:%M:%S", tm, flagsp, loc);
162 			if (buf == NULL)
163 				return (NULL);
164 			break;
165 
166 		case 'X':
167 			buf = __strptime(buf, tptr->X_fmt, tm, flagsp, loc);
168 			if (buf == NULL)
169 				return (NULL);
170 			break;
171 
172 		case 'x':
173 			buf = __strptime(buf, tptr->x_fmt, tm, flagsp, loc);
174 			if (buf == NULL)
175 				return (NULL);
176 			break;
177 
178 		case 'j':
179 			if (!isdigit(*buf))
180 				return (NULL);
181 
182 			len = 3;
183 			for (i = 0; len && isdigit(*buf); buf++) {
184 				i *= 10;
185 				i += *buf - '0';
186 				len--;
187 			}
188 			if (i < 1 || i > 366)
189 				return (NULL);
190 
191 			tm->tm_yday = i - 1;
192 			break;
193 
194 		case 'M':
195 		case 'S':
196 			if (*buf == 0 || isspace(*buf))
197 				break;
198 
199 			if (!isdigit(*buf))
200 				return (NULL);
201 
202 			len = 2;
203 			for (i = 0; len && isdigit(*buf); buf++) {
204 				i *= 10;
205 				i += *buf - '0';
206 				len--;
207 			}
208 
209 			if (c == 'M') {
210 				if (i > 59)
211 					return (NULL);
212 				tm->tm_min = i;
213 			} else {
214 				if (i > 60)
215 					return (NULL);
216 				tm->tm_sec = i;
217 			}
218 
219 			break;
220 
221 		case 'H':
222 		case 'I':
223 		case 'k':
224 		case 'l':
225 			/*
226 			 * Of these, %l is the only specifier explicitly
227 			 * documented as not being zero-padded.  However,
228 			 * there is no harm in allowing zero-padding.
229 			 *
230 			 * XXX The %l specifier may gobble one too many
231 			 * digits if used incorrectly.
232 			 */
233 			if (!isdigit(*buf))
234 				return (NULL);
235 
236 			len = 2;
237 			for (i = 0; len && isdigit(*buf); buf++) {
238 				i *= 10;
239 				i += *buf - '0';
240 				len--;
241 			}
242 			if (c == 'H' || c == 'k') {
243 				if (i > 23)
244 					return (NULL);
245 			} else if (i > 12)
246 				return (NULL);
247 
248 			tm->tm_hour = i;
249 
250 			break;
251 
252 		case 'p':
253 			/*
254 			 * XXX This is bogus if parsed before hour-related
255 			 * specifiers.
256 			 */
257 			len = strlen(tptr->am);
258 			if (strncasecmp(buf, tptr->am, len) == 0) {
259 				if (tm->tm_hour > 12)
260 					return (NULL);
261 				if (tm->tm_hour == 12)
262 					tm->tm_hour = 0;
263 				buf += len;
264 				break;
265 			}
266 
267 			len = strlen(tptr->pm);
268 			if (strncasecmp(buf, tptr->pm, len) == 0) {
269 				if (tm->tm_hour > 12)
270 					return (NULL);
271 				if (tm->tm_hour != 12)
272 					tm->tm_hour += 12;
273 				buf += len;
274 				break;
275 			}
276 
277 			return (NULL);
278 
279 		case 'A':
280 		case 'a':
281 			for (i = 0; i < asizeof(tptr->weekday); i++) {
282 				len = strlen(tptr->weekday[i]);
283 				if (strncasecmp(buf, tptr->weekday[i], len) ==
284 				    0)
285 					break;
286 				len = strlen(tptr->wday[i]);
287 				if (strncasecmp(buf, tptr->wday[i], len) == 0)
288 					break;
289 			}
290 			if (i == asizeof(tptr->weekday))
291 				return (NULL);
292 
293 			tm->tm_wday = i;
294 			buf += len;
295 			break;
296 
297 		case 'U':
298 		case 'W':
299 			/*
300 			 * XXX This is bogus, as we can not assume any valid
301 			 * information present in the tm structure at this
302 			 * point to calculate a real value, so just check the
303 			 * range for now.
304 			 */
305 			if (!isdigit(*buf))
306 				return (NULL);
307 
308 			len = 2;
309 			for (i = 0; len && isdigit(*buf); buf++) {
310 				i *= 10;
311 				i += *buf - '0';
312 				len--;
313 			}
314 			if (i > 53)
315 				return (NULL);
316 
317 			break;
318 
319 		case 'w':
320 			if (!isdigit(*buf))
321 				return (NULL);
322 
323 			i = *buf - '0';
324 			if (i > 6)
325 				return (NULL);
326 
327 			tm->tm_wday = i;
328 
329 			break;
330 
331 		case 'd':
332 		case 'e':
333 			/*
334 			 * The %e format has a space before single digits
335 			 * which we need to skip.
336 			 */
337 			if (isspace(*buf))
338 				buf++;
339 			/*
340 			 * The %e specifier is explicitly documented as not
341 			 * being zero-padded but there is no harm in allowing
342 			 * such padding.
343 			 *
344 			 * XXX The %e specifier may gobble one too many
345 			 * digits if used incorrectly.
346 			 */
347 			if (!isdigit(*buf))
348 				return (NULL);
349 
350 			len = 2;
351 			for (i = 0; len && isdigit(*buf); buf++) {
352 				i *= 10;
353 				i += *buf - '0';
354 				len--;
355 			}
356 			if (i > 31)
357 				return (NULL);
358 
359 			tm->tm_mday = i;
360 
361 			break;
362 
363 		case 'B':
364 		case 'b':
365 		case 'h':
366 			for (i = 0; i < asizeof(tptr->month); i++) {
367 				len = strlen(tptr->month[i]);
368 				if (strncasecmp(buf, tptr->month[i], len) == 0)
369 					break;
370 			}
371 			/*
372 			 * Try the abbreviated month name if the full name
373 			 * wasn't found.
374 			 */
375 			if (i == asizeof(tptr->month)) {
376 				for (i = 0; i < asizeof(tptr->month); i++) {
377 					len = strlen(tptr->mon[i]);
378 					if (strncasecmp(buf, tptr->mon[i],
379 					    len) == 0)
380 						break;
381 				}
382 			}
383 			if (i == asizeof(tptr->month))
384 				return (NULL);
385 
386 			tm->tm_mon = i;
387 			buf += len;
388 			break;
389 
390 		case 'm':
391 			if (!isdigit(*buf))
392 				return (NULL);
393 
394 			len = 2;
395 			for (i = 0; len && isdigit(*buf); buf++) {
396 				i *= 10;
397 				i += *buf - '0';
398 				len--;
399 			}
400 			if (i < 1 || i > 12)
401 				return (NULL);
402 
403 			tm->tm_mon = i - 1;
404 
405 			break;
406 
407 		case 's':
408 			{
409 			char *cp;
410 			int sverrno;
411 			time_t t;
412 
413 			sverrno = errno;
414 			errno = 0;
415 			t = strtol(buf, &cp, 10);
416 			if (errno == ERANGE) {
417 				errno = sverrno;
418 				return (NULL);
419 			}
420 			errno = sverrno;
421 			buf = cp;
422 			(void) gmtime_r(&t, tm);
423 			*flagsp |= F_GMT;
424 			}
425 			break;
426 
427 		case 'Y':
428 		case 'y':
429 			if (*buf == '\0' || isspace(*buf))
430 				break;
431 
432 			if (!isdigit(*buf))
433 				return (NULL);
434 
435 			len = (c == 'Y') ? 4 : 2;
436 			for (i = 0; len && isdigit(*buf); buf++) {
437 				i *= 10;
438 				i += *buf - '0';
439 				len--;
440 			}
441 			if (c == 'Y')
442 				i -= 1900;
443 			if (c == 'y' && i < 69)
444 				i += 100;
445 			if (i < 0)
446 				return (NULL);
447 
448 			tm->tm_year = i;
449 
450 			break;
451 
452 		case 'Z':
453 			{
454 			const char *cp = buf;
455 			char *zonestr;
456 
457 			while (isupper(*cp))
458 				++cp;
459 			if (cp - buf) {
460 				zonestr = alloca(cp - buf + 1);
461 				(void) strncpy(zonestr, buf, cp - buf);
462 				zonestr[cp - buf] = '\0';
463 				tzset();
464 				if (strcmp(zonestr, "GMT") == 0) {
465 					*flagsp |= F_GMT;
466 				} else if (0 == strcmp(zonestr, tzname[0])) {
467 					tm->tm_isdst = 0;
468 				} else if (0 == strcmp(zonestr, tzname[1])) {
469 					tm->tm_isdst = 1;
470 				} else {
471 					return (NULL);
472 				}
473 				buf += cp - buf;
474 			}
475 			}
476 			break;
477 
478 		case 'z':
479 			{
480 			int sign = 1;
481 
482 			if (*buf != '+') {
483 				if (*buf == '-')
484 					sign = -1;
485 				else
486 					return (NULL);
487 			}
488 			buf++;
489 			i = 0;
490 			for (len = 4; len > 0; len--) {
491 				if (!isdigit(*buf))
492 					return (NULL);
493 				i *= 10;
494 				i += *buf - '0';
495 				buf++;
496 			}
497 
498 			tm->tm_hour -= sign * (i / 100);
499 			tm->tm_min -= sign * (i % 100);
500 			*flagsp |= F_GMT;
501 			}
502 			break;
503 		case 'n':
504 		case 't':
505 			while (isspace(*buf))
506 				buf++;
507 			break;
508 		}
509 	}
510 
511 	if (!recurse) {
512 		if (buf && (*flagsp & F_GMT)) {
513 			time_t t = timegm(tm);
514 			(void) localtime_r(&t, tm);
515 		}
516 	}
517 
518 	return ((char *)buf);
519 }
520 
521 char *
522 strptime(const char *_RESTRICT_KYWD buf, const char *_RESTRICT_KYWD fmt,
523     struct tm *_RESTRICT_KYWD tm)
524 {
525 	int	flags = 0;
526 
527 	(void) memset(tm, 0, sizeof (*tm));
528 
529 	return (__strptime(buf, fmt, tm, &flags, uselocale(NULL)));
530 }
531 
532 /*
533  * This is used by Solaris, and is a variant that does not clear the
534  * incoming tm.  It is triggered by -D_STRPTIME_DONTZERO.
535  */
536 char *
537 __strptime_dontzero(const char *_RESTRICT_KYWD buf,
538     const char *_RESTRICT_KYWD fmt, struct tm *_RESTRICT_KYWD tm)
539 {
540 	int	flags = 0;
541 
542 	return (__strptime(buf, fmt, tm, &flags, uselocale(NULL)));
543 }
544 
545 /*
546  * strptime_l is an extension that seems natural, and indeed, MacOS X
547  * includes it within their <xlocale.h> and it is part of GNU libc as well.
548  * For now we restrict it to the cases where strict namespaces are not
549  * included.  We expect to see it in a future version of POSIX.  locale_t is
550  * not a restrict, since the spec for it doesn't assume its a pointer.  We
551  * therefore pass it analagously to the way strftime_l is specified.
552  *
553  * We are not providing a non-zeroing version at this time.
554  */
555 char *
556 strptime_l(const char *_RESTRICT_KYWD buf, const char *_RESTRICT_KYWD fmt,
557     struct tm *_RESTRICT_KYWD tm, locale_t loc)
558 {
559 	int	flags =  0;
560 
561 	(void) memset(tm, 0, sizeof (*tm));
562 
563 	return (__strptime(buf, fmt, tm, &flags, loc));
564 }
565