xref: /illumos-gate/usr/src/lib/libc/port/locale/strptime.c (revision 83eaeac78ef2f69de16c2fecd3077c0ee9269743)
1 /*
2  * Copyright 2010, Nexenta Systems, Inc.  All rights reserved.
3  * Copyright (c) 1994 Powerdog Industries.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  *
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer
14  *    in the documentation and/or other materials provided with the
15  *    distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY POWERDOG INDUSTRIES ``AS IS'' AND ANY
18  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE POWERDOG INDUSTRIES BE
21  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
24  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
25  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
26  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
27  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  *
29  * The views and conclusions contained in the software and documentation
30  * are those of the authors and should not be interpreted as representing
31  * official policies, either expressed or implied, of Powerdog Industries.
32  */
33 
34 #include "lint.h"
35 #include <time.h>
36 #include <ctype.h>
37 #include <errno.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <pthread.h>
41 #include <alloca.h>
42 #include "timelocal.h"
43 
44 #define	asizeof(a)	(sizeof (a) / sizeof ((a)[0]))
45 
46 static char *
47 __strptime(const char *buf, const char *fmt, struct tm *tm)
48 {
49 	char	c;
50 	const char *ptr;
51 	int	i, len;
52 	int Ealternative, Oalternative;
53 	struct lc_time_T *tptr = __get_current_time_locale();
54 
55 	ptr = fmt;
56 	while (*ptr != 0) {
57 		if (*buf == 0)
58 			break;
59 
60 		c = *ptr++;
61 
62 		if (c != '%') {
63 			if (isspace((unsigned char)c))
64 				while (*buf != 0 &&
65 				    isspace((unsigned char)*buf))
66 					buf++;
67 			else if (c != *buf++)
68 				return (0);
69 			continue;
70 		}
71 
72 		Ealternative = 0;
73 		Oalternative = 0;
74 label:
75 		c = *ptr++;
76 		switch (c) {
77 		case 0:
78 		case '%':
79 			if (*buf++ != '%')
80 				return (0);
81 			break;
82 
83 		case '+':
84 			buf = __strptime(buf, tptr->date_fmt, tm);
85 			if (buf == 0)
86 				return (0);
87 			break;
88 
89 		case 'C':
90 			if (!isdigit((unsigned char)*buf))
91 				return (0);
92 
93 			/* XXX This will break for 3-digit centuries. */
94 			len = 2;
95 			for (i = 0;
96 			    len && isdigit((unsigned char)*buf);
97 			    buf++) {
98 				i *= 10;
99 				i += *buf - '0';
100 				len--;
101 			}
102 			if (i < 19)
103 				return (0);
104 
105 			tm->tm_year = i * 100 - 1900;
106 			break;
107 
108 		case 'c':
109 			buf = __strptime(buf, tptr->c_fmt, tm);
110 			if (buf == 0)
111 				return (0);
112 			break;
113 
114 		case 'D':
115 			buf = __strptime(buf, "%m/%d/%y", tm);
116 			if (buf == 0)
117 				return (0);
118 			break;
119 
120 		case 'E':
121 			if (Ealternative || Oalternative)
122 				break;
123 			Ealternative++;
124 			goto label;
125 
126 		case 'O':
127 			if (Ealternative || Oalternative)
128 				break;
129 			Oalternative++;
130 			goto label;
131 
132 		case 'F':
133 			buf = __strptime(buf, "%Y-%m-%d", tm);
134 			if (buf == 0)
135 				return (0);
136 			break;
137 
138 		case 'R':
139 			buf = __strptime(buf, "%H:%M", tm);
140 			if (buf == 0)
141 				return (0);
142 			break;
143 
144 		case 'r':
145 			buf = __strptime(buf, tptr->ampm_fmt, tm);
146 			if (buf == 0)
147 				return (0);
148 			break;
149 
150 		case 'T':
151 			buf = __strptime(buf, "%H:%M:%S", tm);
152 			if (buf == 0)
153 				return (0);
154 			break;
155 
156 		case 'X':
157 			buf = __strptime(buf, tptr->X_fmt, tm);
158 			if (buf == 0)
159 				return (0);
160 			break;
161 
162 		case 'x':
163 			buf = __strptime(buf, tptr->x_fmt, tm);
164 			if (buf == 0)
165 				return (0);
166 			break;
167 
168 		case 'j':
169 			if (!isdigit((unsigned char)*buf))
170 				return (0);
171 
172 			len = 3;
173 			for (i = 0;
174 			    len && isdigit((unsigned char)*buf);
175 			    buf++) {
176 				i *= 10;
177 				i += *buf - '0';
178 				len--;
179 			}
180 			if (i < 1 || i > 366)
181 				return (0);
182 
183 			tm->tm_yday = i - 1;
184 			break;
185 
186 		case 'M':
187 		case 'S':
188 			if (*buf == 0 || isspace((unsigned char)*buf))
189 				break;
190 
191 			if (!isdigit((unsigned char)*buf))
192 				return (0);
193 
194 			len = 2;
195 			for (i = 0;
196 			    len && isdigit((unsigned char)*buf);
197 			    buf++) {
198 				i *= 10;
199 				i += *buf - '0';
200 				len--;
201 			}
202 
203 			if (c == 'M') {
204 				if (i > 59)
205 					return (0);
206 				tm->tm_min = i;
207 			} else {
208 				if (i > 60)
209 					return (0);
210 				tm->tm_sec = i;
211 			}
212 
213 			if (*buf != 0 && isspace((unsigned char)*buf))
214 				while (*ptr != 0 &&
215 				    !isspace((unsigned char)*ptr))
216 					ptr++;
217 			break;
218 
219 		case 'H':
220 		case 'I':
221 		case 'k':
222 		case 'l':
223 			/*
224 			 * Of these, %l is the only specifier explicitly
225 			 * documented as not being zero-padded.  However,
226 			 * there is no harm in allowing zero-padding.
227 			 *
228 			 * XXX The %l specifier may gobble one too many
229 			 * digits if used incorrectly.
230 			 */
231 			if (!isdigit((unsigned char)*buf))
232 				return (0);
233 
234 			len = 2;
235 			for (i = 0;
236 			    len && isdigit((unsigned char)*buf);
237 			    buf++) {
238 				i *= 10;
239 				i += *buf - '0';
240 				len--;
241 			}
242 			if (c == 'H' || c == 'k') {
243 				if (i > 23)
244 					return (0);
245 			} else if (i > 12)
246 				return (0);
247 
248 			tm->tm_hour = i;
249 
250 			if (*buf != 0 && isspace((unsigned char)*buf))
251 				while (*ptr != 0 &&
252 				    !isspace((unsigned char)*ptr))
253 					ptr++;
254 			break;
255 
256 		case 'p':
257 			/*
258 			 * XXX This is bogus if parsed before hour-related
259 			 * specifiers.
260 			 */
261 			len = strlen(tptr->am);
262 			if (strncasecmp(buf, tptr->am, len) == 0) {
263 				if (tm->tm_hour > 12)
264 					return (0);
265 				if (tm->tm_hour == 12)
266 					tm->tm_hour = 0;
267 				buf += len;
268 				break;
269 			}
270 
271 			len = strlen(tptr->pm);
272 			if (strncasecmp(buf, tptr->pm, len) == 0) {
273 				if (tm->tm_hour > 12)
274 					return (0);
275 				if (tm->tm_hour != 12)
276 					tm->tm_hour += 12;
277 				buf += len;
278 				break;
279 			}
280 
281 			return (0);
282 
283 		case 'A':
284 		case 'a':
285 			for (i = 0; i < asizeof(tptr->weekday); i++) {
286 				len = strlen(tptr->weekday[i]);
287 				if (strncasecmp(buf, tptr->weekday[i], len) ==
288 				    0)
289 					break;
290 				len = strlen(tptr->wday[i]);
291 				if (strncasecmp(buf, tptr->wday[i], len) == 0)
292 					break;
293 			}
294 			if (i == asizeof(tptr->weekday))
295 				return (0);
296 
297 			tm->tm_wday = i;
298 			buf += len;
299 			break;
300 
301 		case 'U':
302 		case 'W':
303 			/*
304 			 * XXX This is bogus, as we can not assume any valid
305 			 * information present in the tm structure at this
306 			 * point to calculate a real value, so just check the
307 			 * range for now.
308 			 */
309 			if (!isdigit((unsigned char)*buf))
310 				return (0);
311 
312 			len = 2;
313 			for (i = 0;
314 			    len && isdigit((unsigned char)*buf);
315 			    buf++) {
316 				i *= 10;
317 				i += *buf - '0';
318 				len--;
319 			}
320 			if (i > 53)
321 				return (0);
322 
323 			if (*buf != 0 && isspace((unsigned char)*buf))
324 				while (*ptr != 0 &&
325 				    !isspace((unsigned char)*ptr))
326 					ptr++;
327 			break;
328 
329 		case 'w':
330 			if (!isdigit((unsigned char)*buf))
331 				return (0);
332 
333 			i = *buf - '0';
334 			if (i > 6)
335 				return (0);
336 
337 			tm->tm_wday = i;
338 
339 			if (*buf != 0 && isspace((unsigned char)*buf))
340 				while (*ptr != 0 &&
341 				    !isspace((unsigned char)*ptr))
342 					ptr++;
343 			break;
344 
345 		case 'd':
346 		case 'e':
347 			/*
348 			 * The %e specifier is explicitly documented as not
349 			 * being zero-padded but there is no harm in allowing
350 			 * such padding.
351 			 *
352 			 * XXX The %e specifier may gobble one too many
353 			 * digits if used incorrectly.
354 			 */
355 			if (!isdigit((unsigned char)*buf))
356 				return (0);
357 
358 			len = 2;
359 			for (i = 0;
360 			    len && isdigit((unsigned char)*buf);
361 			    buf++) {
362 				i *= 10;
363 				i += *buf - '0';
364 				len--;
365 			}
366 			if (i > 31)
367 				return (0);
368 
369 			tm->tm_mday = i;
370 
371 			if (*buf != 0 && isspace((unsigned char)*buf))
372 				while (*ptr != 0 &&
373 				    !isspace((unsigned char)*ptr))
374 					ptr++;
375 			break;
376 
377 		case 'B':
378 		case 'b':
379 		case 'h':
380 			for (i = 0; i < asizeof(tptr->month); i++) {
381 				len = strlen(tptr->month[i]);
382 				if (strncasecmp(buf, tptr->month[i], len) == 0)
383 					break;
384 			}
385 			/*
386 			 * Try the abbreviated month name if the full name
387 			 * wasn't found.
388 			 */
389 			if (i == asizeof(tptr->month)) {
390 				for (i = 0; i < asizeof(tptr->month); i++) {
391 					len = strlen(tptr->mon[i]);
392 					if (strncasecmp(buf, tptr->mon[i],
393 					    len) == 0)
394 						break;
395 				}
396 			}
397 			if (i == asizeof(tptr->month))
398 				return (0);
399 
400 			tm->tm_mon = i;
401 			buf += len;
402 			break;
403 
404 		case 'm':
405 			if (!isdigit((unsigned char)*buf))
406 				return (0);
407 
408 			len = 2;
409 			for (i = 0;
410 			    len && isdigit((unsigned char)*buf);
411 			    buf++) {
412 				i *= 10;
413 				i += *buf - '0';
414 				len--;
415 			}
416 			if (i < 1 || i > 12)
417 				return (0);
418 
419 			tm->tm_mon = i - 1;
420 
421 			if (*buf != 0 && isspace((unsigned char)*buf))
422 				while (*ptr != 0 &&
423 				    !isspace((unsigned char)*ptr))
424 					ptr++;
425 			break;
426 
427 		case 'Y':
428 		case 'y':
429 			if (*buf == 0 || isspace((unsigned char)*buf))
430 				break;
431 
432 			if (!isdigit((unsigned char)*buf))
433 				return (0);
434 
435 			len = (c == 'Y') ? 4 : 2;
436 			for (i = 0;
437 			    len && isdigit((unsigned char)*buf);
438 			    buf++) {
439 				i *= 10;
440 				i += *buf - '0';
441 				len--;
442 			}
443 			if (c == 'Y')
444 				i -= 1900;
445 			if (c == 'y' && i < 69)
446 				i += 100;
447 			if (i < 0)
448 				return (0);
449 
450 			tm->tm_year = i;
451 
452 			if (*buf != 0 && isspace((unsigned char)*buf))
453 				while (*ptr != 0 &&
454 				    !isspace((unsigned char)*ptr))
455 					ptr++;
456 			break;
457 
458 		case 'Z':
459 			{
460 			const char *cp = buf;
461 			char *zonestr;
462 
463 			while (isupper((unsigned char)*cp))
464 				++cp;
465 			if (cp - buf) {
466 				zonestr = alloca(cp - buf + 1);
467 				(void) strncpy(zonestr, buf, cp - buf);
468 				zonestr[cp - buf] = '\0';
469 				tzset();
470 				/*
471 				 * Once upon a time this supported "GMT",
472 				 * for GMT, but we removed this as Solaris
473 				 * doesn't have it, and we lack the needed
474 				 * timegm function.
475 				 */
476 				if (0 == strcmp(zonestr, tzname[0])) {
477 					tm->tm_isdst = 0;
478 				} else if (0 == strcmp(zonestr, tzname[1])) {
479 					tm->tm_isdst = 1;
480 				} else {
481 					return (0);
482 				}
483 				buf += cp - buf;
484 			}
485 			}
486 			break;
487 
488 		/*
489 		 * Note that there used to be support %z and %s, but these
490 		 * are not supported by Solaris, so we have removed them.
491 		 * They would have required timegm() which is missing.
492 		 */
493 		}
494 	}
495 	return ((char *)buf);
496 }
497 
498 char *
499 strptime(const char *buf, const char *fmt, struct tm *tm)
500 {
501 	/* Legacy Solaris strptime clears the incoming tm structure. */
502 	(void) memset(tm, 0, sizeof (*tm));
503 
504 	return (__strptime(buf, fmt, tm));
505 }
506 
507 /*
508  * This is used by Solaris, and is a variant that does not clear the
509  * incoming tm.  It is triggered by -D_STRPTIME_DONTZERO.
510  */
511 char *
512 __strptime_dontzero(const char *buf, const char *fmt, struct tm *tm)
513 {
514 	return (__strptime(buf, fmt, tm));
515 }
516