xref: /freebsd/contrib/libarchive/libarchive/archive_parse_date.c (revision 401026e4825a05abba6f945cf1b74b3328876fa2)
1 /*
2  * This code is in the public domain and has no copyright.
3  *
4  * This is a plain C recursive-descent translation of an old
5  * public-domain YACC grammar that has been used for parsing dates in
6  * very many open-source projects.
7  *
8  * Since the original authors were generous enough to donate their
9  * work to the public domain, I feel compelled to match their
10  * generosity.
11  *
12  * Tim Kientzle, February 2009.
13  */
14 
15 /*
16  * Header comment from original getdate.y:
17  */
18 
19 /*
20 **  Originally written by Steven M. Bellovin <smb@research.att.com> while
21 **  at the University of North Carolina at Chapel Hill.  Later tweaked by
22 **  a couple of people on Usenet.  Completely overhauled by Rich $alz
23 **  <rsalz@bbn.com> and Jim Berets <jberets@bbn.com> in August, 1990;
24 **
25 **  This grammar has 10 shift/reduce conflicts.
26 **
27 **  This code is in the public domain and has no copyright.
28 */
29 
30 #include "archive_platform.h"
31 
32 #include <ctype.h>
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <time.h>
37 
38 #include "archive.h"
39 
40 /* Basic time units. */
41 #define	EPOCH		1970
42 #define	MINUTE		(60L)
43 #define	HOUR		(60L * MINUTE)
44 #define	DAY		(24L * HOUR)
45 
46 /* Daylight-savings mode:  on, off, or not yet known. */
47 enum DSTMODE { DSTon, DSToff, DSTmaybe };
48 /* Meridian:  am or pm. */
49 enum { tAM, tPM };
50 /* Token types returned by nexttoken() */
51 enum { tAGO = 260, tDAY, tDAYZONE, tAMPM, tMONTH, tMONTH_UNIT, tSEC_UNIT,
52        tUNUMBER, tZONE, tDST };
53 struct token { int token; time_t value; };
54 
55 /*
56  * Parser state.
57  */
58 struct gdstate {
59 	struct token *tokenp; /* Pointer to next token. */
60 	/* HaveXxxx counts how many of this kind of phrase we've seen;
61 	 * it's a fatal error to have more than one time, zone, day,
62 	 * or date phrase. */
63 	int	HaveYear;
64 	int	HaveMonth;
65 	int	HaveDay;
66 	int	HaveWeekDay; /* Day of week */
67 	int	HaveTime; /* Hour/minute/second */
68 	int	HaveZone; /* timezone and/or DST info */
69 	int	HaveRel; /* time offset; we can have more than one */
70 	/* Absolute time values. */
71 	time_t	Timezone;  /* Seconds offset from GMT */
72 	time_t	Day;
73 	time_t	Hour;
74 	time_t	Minutes;
75 	time_t	Month;
76 	time_t	Seconds;
77 	time_t	Year;
78 	/* DST selection */
79 	enum DSTMODE	DSTmode;
80 	/* Day of week accounting, e.g., "3rd Tuesday" */
81 	time_t	DayOrdinal; /* "3" in "3rd Tuesday" */
82 	time_t	DayNumber; /* "Tuesday" in "3rd Tuesday" */
83 	/* Relative time values: hour/day/week offsets are measured in
84 	 * seconds, month/year are counted in months. */
85 	time_t	RelMonth;
86 	time_t	RelSeconds;
87 };
88 
89 /*
90  * A series of functions that recognize certain common time phrases.
91  * Each function returns 1 if it managed to make sense of some of the
92  * tokens, zero otherwise.
93  */
94 
95 /*
96  *  hour:minute or hour:minute:second with optional AM, PM, or numeric
97  *  timezone offset
98  */
99 static int
timephrase(struct gdstate * gds)100 timephrase(struct gdstate *gds)
101 {
102 	if (gds->tokenp[0].token == tUNUMBER
103 	    && gds->tokenp[1].token == ':'
104 	    && gds->tokenp[2].token == tUNUMBER
105 	    && gds->tokenp[3].token == ':'
106 	    && gds->tokenp[4].token == tUNUMBER) {
107 		/* "12:14:18" or "22:08:07" */
108 		++gds->HaveTime;
109 		gds->Hour = gds->tokenp[0].value;
110 		gds->Minutes = gds->tokenp[2].value;
111 		gds->Seconds = gds->tokenp[4].value;
112 		gds->tokenp += 5;
113 	}
114 	else if (gds->tokenp[0].token == tUNUMBER
115 	    && gds->tokenp[1].token == ':'
116 	    && gds->tokenp[2].token == tUNUMBER) {
117 		/* "12:14" or "22:08" */
118 		++gds->HaveTime;
119 		gds->Hour = gds->tokenp[0].value;
120 		gds->Minutes = gds->tokenp[2].value;
121 		gds->Seconds = 0;
122 		gds->tokenp += 3;
123 	}
124 	else if (gds->tokenp[0].token == tUNUMBER
125 	    && gds->tokenp[1].token == tAMPM) {
126 		/* "7" is a time if it's followed by "am" or "pm" */
127 		++gds->HaveTime;
128 		gds->Hour = gds->tokenp[0].value;
129 		gds->Minutes = gds->Seconds = 0;
130 		/* We'll handle the AM/PM below. */
131 		gds->tokenp += 1;
132 	} else {
133 		/* We can't handle this. */
134 		return 0;
135 	}
136 
137 	if (gds->tokenp[0].token == tAMPM) {
138 		/* "7:12pm", "12:20:13am" */
139 		if (gds->Hour == 12)
140 			gds->Hour = 0;
141 		if (gds->tokenp[0].value == tPM)
142 			gds->Hour += 12;
143 		gds->tokenp += 1;
144 	}
145 	if (gds->tokenp[0].token == '+'
146 	    && gds->tokenp[1].token == tUNUMBER) {
147 		/* "7:14+0700" */
148 		gds->HaveZone++;
149 		gds->DSTmode = DSToff;
150 		gds->Timezone = - ((gds->tokenp[1].value / 100) * HOUR
151 		    + (gds->tokenp[1].value % 100) * MINUTE);
152 		gds->tokenp += 2;
153 	}
154 	if (gds->tokenp[0].token == '-'
155 	    && gds->tokenp[1].token == tUNUMBER) {
156 		/* "19:14:12-0530" */
157 		gds->HaveZone++;
158 		gds->DSTmode = DSToff;
159 		gds->Timezone = + ((gds->tokenp[1].value / 100) * HOUR
160 		    + (gds->tokenp[1].value % 100) * MINUTE);
161 		gds->tokenp += 2;
162 	}
163 	return 1;
164 }
165 
166 /*
167  * Timezone name, possibly including DST.
168  */
169 static int
zonephrase(struct gdstate * gds)170 zonephrase(struct gdstate *gds)
171 {
172 	if (gds->tokenp[0].token == tZONE
173 	    && gds->tokenp[1].token == tDST) {
174 		gds->HaveZone++;
175 		gds->Timezone = gds->tokenp[0].value;
176 		gds->DSTmode = DSTon;
177 		gds->tokenp += 1;
178 		return 1;
179 	}
180 
181 	if (gds->tokenp[0].token == tZONE) {
182 		gds->HaveZone++;
183 		gds->Timezone = gds->tokenp[0].value;
184 		gds->DSTmode = DSToff;
185 		gds->tokenp += 1;
186 		return 1;
187 	}
188 
189 	if (gds->tokenp[0].token == tDAYZONE) {
190 		gds->HaveZone++;
191 		gds->Timezone = gds->tokenp[0].value;
192 		gds->DSTmode = DSTon;
193 		gds->tokenp += 1;
194 		return 1;
195 	}
196 	return 0;
197 }
198 
199 /*
200  * Year/month/day in various combinations.
201  */
202 static int
datephrase(struct gdstate * gds)203 datephrase(struct gdstate *gds)
204 {
205 	if (gds->tokenp[0].token == tUNUMBER
206 	    && gds->tokenp[1].token == '/'
207 	    && gds->tokenp[2].token == tUNUMBER
208 	    && gds->tokenp[3].token == '/'
209 	    && gds->tokenp[4].token == tUNUMBER) {
210 		gds->HaveYear++;
211 		gds->HaveMonth++;
212 		gds->HaveDay++;
213 		if (gds->tokenp[0].value >= 13) {
214 			/* First number is big:  2004/01/29, 99/02/17 */
215 			gds->Year = gds->tokenp[0].value;
216 			gds->Month = gds->tokenp[2].value;
217 			gds->Day = gds->tokenp[4].value;
218 		} else if ((gds->tokenp[4].value >= 13)
219 		    || (gds->tokenp[2].value >= 13)) {
220 			/* Last number is big:  01/07/98 */
221 			/* Middle number is big:  01/29/04 */
222 			gds->Month = gds->tokenp[0].value;
223 			gds->Day = gds->tokenp[2].value;
224 			gds->Year = gds->tokenp[4].value;
225 		} else {
226 			/* No significant clues: 02/03/04 */
227 			gds->Month = gds->tokenp[0].value;
228 			gds->Day = gds->tokenp[2].value;
229 			gds->Year = gds->tokenp[4].value;
230 		}
231 		gds->tokenp += 5;
232 		return 1;
233 	}
234 
235 	if (gds->tokenp[0].token == tUNUMBER
236 	    && gds->tokenp[1].token == '/'
237 	    && gds->tokenp[2].token == tUNUMBER) {
238 		/* "1/15" */
239 		gds->HaveMonth++;
240 		gds->HaveDay++;
241 		gds->Month = gds->tokenp[0].value;
242 		gds->Day = gds->tokenp[2].value;
243 		gds->tokenp += 3;
244 		return 1;
245 	}
246 
247 	if (gds->tokenp[0].token == tUNUMBER
248 	    && gds->tokenp[1].token == '-'
249 	    && gds->tokenp[2].token == tUNUMBER
250 	    && gds->tokenp[3].token == '-'
251 	    && gds->tokenp[4].token == tUNUMBER) {
252 		/* ISO 8601 format.  yyyy-mm-dd.  */
253 		gds->HaveYear++;
254 		gds->HaveMonth++;
255 		gds->HaveDay++;
256 		gds->Year = gds->tokenp[0].value;
257 		gds->Month = gds->tokenp[2].value;
258 		gds->Day = gds->tokenp[4].value;
259 		gds->tokenp += 5;
260 		return 1;
261 	}
262 
263 	if (gds->tokenp[0].token == tUNUMBER
264 	    && gds->tokenp[1].token == '-'
265 	    && gds->tokenp[2].token == tMONTH
266 	    && gds->tokenp[3].token == '-'
267 	    && gds->tokenp[4].token == tUNUMBER) {
268 		gds->HaveYear++;
269 		gds->HaveMonth++;
270 		gds->HaveDay++;
271 		if (gds->tokenp[0].value > 31) {
272 			/* e.g. 1992-Jun-17 */
273 			gds->Year = gds->tokenp[0].value;
274 			gds->Month = gds->tokenp[2].value;
275 			gds->Day = gds->tokenp[4].value;
276 		} else {
277 			/* e.g. 17-JUN-1992.  */
278 			gds->Day = gds->tokenp[0].value;
279 			gds->Month = gds->tokenp[2].value;
280 			gds->Year = gds->tokenp[4].value;
281 		}
282 		gds->tokenp += 5;
283 		return 1;
284 	}
285 
286 	if (gds->tokenp[0].token == tMONTH
287 	    && gds->tokenp[1].token == tUNUMBER
288 	    && gds->tokenp[2].token == ','
289 	    && gds->tokenp[3].token == tUNUMBER) {
290 		/* "June 17, 2001" */
291 		gds->HaveYear++;
292 		gds->HaveMonth++;
293 		gds->HaveDay++;
294 		gds->Month = gds->tokenp[0].value;
295 		gds->Day = gds->tokenp[1].value;
296 		gds->Year = gds->tokenp[3].value;
297 		gds->tokenp += 4;
298 		return 1;
299 	}
300 
301 	if (gds->tokenp[0].token == tMONTH
302 	    && gds->tokenp[1].token == tUNUMBER) {
303 		/* "May 3" */
304 		gds->HaveMonth++;
305 		gds->HaveDay++;
306 		gds->Month = gds->tokenp[0].value;
307 		gds->Day = gds->tokenp[1].value;
308 		gds->tokenp += 2;
309 		return 1;
310 	}
311 
312 	if (gds->tokenp[0].token == tUNUMBER
313 	    && gds->tokenp[1].token == tMONTH
314 	    && gds->tokenp[2].token == tUNUMBER) {
315 		/* "12 Sept 1997" */
316 		gds->HaveYear++;
317 		gds->HaveMonth++;
318 		gds->HaveDay++;
319 		gds->Day = gds->tokenp[0].value;
320 		gds->Month = gds->tokenp[1].value;
321 		gds->Year = gds->tokenp[2].value;
322 		gds->tokenp += 3;
323 		return 1;
324 	}
325 
326 	if (gds->tokenp[0].token == tUNUMBER
327 	    && gds->tokenp[1].token == tMONTH) {
328 		/* "12 Sept" */
329 		gds->HaveMonth++;
330 		gds->HaveDay++;
331 		gds->Day = gds->tokenp[0].value;
332 		gds->Month = gds->tokenp[1].value;
333 		gds->tokenp += 2;
334 		return 1;
335 	}
336 
337 	return 0;
338 }
339 
340 /*
341  * Relative time phrase: "tomorrow", "yesterday", "+1 hour", etc.
342  */
343 static int
relunitphrase(struct gdstate * gds)344 relunitphrase(struct gdstate *gds)
345 {
346 	if (gds->tokenp[0].token == '-'
347 	    && gds->tokenp[1].token == tUNUMBER
348 	    && gds->tokenp[2].token == tSEC_UNIT) {
349 		/* "-3 hours" */
350 		gds->HaveRel++;
351 		gds->RelSeconds -= gds->tokenp[1].value * gds->tokenp[2].value;
352 		gds->tokenp += 3;
353 		return 1;
354 	}
355 	if (gds->tokenp[0].token == '+'
356 	    && gds->tokenp[1].token == tUNUMBER
357 	    && gds->tokenp[2].token == tSEC_UNIT) {
358 		/* "+1 minute" */
359 		gds->HaveRel++;
360 		gds->RelSeconds += gds->tokenp[1].value * gds->tokenp[2].value;
361 		gds->tokenp += 3;
362 		return 1;
363 	}
364 	if (gds->tokenp[0].token == tUNUMBER
365 	    && gds->tokenp[1].token == tSEC_UNIT) {
366 		/* "1 day" */
367 		gds->HaveRel++;
368 		gds->RelSeconds += gds->tokenp[0].value * gds->tokenp[1].value;
369 		gds->tokenp += 2;
370 		return 1;
371 	}
372 	if (gds->tokenp[0].token == '-'
373 	    && gds->tokenp[1].token == tUNUMBER
374 	    && gds->tokenp[2].token == tMONTH_UNIT) {
375 		/* "-3 months" */
376 		gds->HaveRel++;
377 		gds->RelMonth -= gds->tokenp[1].value * gds->tokenp[2].value;
378 		gds->tokenp += 3;
379 		return 1;
380 	}
381 	if (gds->tokenp[0].token == '+'
382 	    && gds->tokenp[1].token == tUNUMBER
383 	    && gds->tokenp[2].token == tMONTH_UNIT) {
384 		/* "+5 years" */
385 		gds->HaveRel++;
386 		gds->RelMonth += gds->tokenp[1].value * gds->tokenp[2].value;
387 		gds->tokenp += 3;
388 		return 1;
389 	}
390 	if (gds->tokenp[0].token == tUNUMBER
391 	    && gds->tokenp[1].token == tMONTH_UNIT) {
392 		/* "2 years" */
393 		gds->HaveRel++;
394 		gds->RelMonth += gds->tokenp[0].value * gds->tokenp[1].value;
395 		gds->tokenp += 2;
396 		return 1;
397 	}
398 	if (gds->tokenp[0].token == tSEC_UNIT) {
399 		/* "now", "tomorrow" */
400 		gds->HaveRel++;
401 		gds->RelSeconds += gds->tokenp[0].value;
402 		gds->tokenp += 1;
403 		return 1;
404 	}
405 	if (gds->tokenp[0].token == tMONTH_UNIT) {
406 		/* "month" */
407 		gds->HaveRel++;
408 		gds->RelMonth += gds->tokenp[0].value;
409 		gds->tokenp += 1;
410 		return 1;
411 	}
412 	return 0;
413 }
414 
415 /*
416  * Day of the week specification.
417  */
418 static int
dayphrase(struct gdstate * gds)419 dayphrase(struct gdstate *gds)
420 {
421 	if (gds->tokenp[0].token == tDAY) {
422 		/* "tues", "wednesday," */
423 		gds->HaveWeekDay++;
424 		gds->DayOrdinal = 1;
425 		gds->DayNumber = gds->tokenp[0].value;
426 		gds->tokenp += 1;
427 		if (gds->tokenp[0].token == ',')
428 			gds->tokenp += 1;
429 		return 1;
430 	}
431 	if (gds->tokenp[0].token == tUNUMBER
432 		&& gds->tokenp[1].token == tDAY) {
433 		/* "second tues" "3 wed" */
434 		gds->HaveWeekDay++;
435 		gds->DayOrdinal = gds->tokenp[0].value;
436 		gds->DayNumber = gds->tokenp[1].value;
437 		gds->tokenp += 2;
438 		return 1;
439 	}
440 	return 0;
441 }
442 
443 /*
444  * Try to match a phrase using one of the above functions.
445  * This layer also deals with a couple of generic issues.
446  */
447 static int
phrase(struct gdstate * gds)448 phrase(struct gdstate *gds)
449 {
450 	if (timephrase(gds))
451 		return 1;
452 	if (zonephrase(gds))
453 		return 1;
454 	if (datephrase(gds))
455 		return 1;
456 	if (dayphrase(gds))
457 		return 1;
458 	if (relunitphrase(gds)) {
459 		if (gds->tokenp[0].token == tAGO) {
460 			gds->RelSeconds = -gds->RelSeconds;
461 			gds->RelMonth = -gds->RelMonth;
462 			gds->tokenp += 1;
463 		}
464 		return 1;
465 	}
466 
467 	/* Bare numbers sometimes have meaning. */
468 	if (gds->tokenp[0].token == tUNUMBER) {
469 		if (gds->HaveTime && !gds->HaveYear && !gds->HaveRel) {
470 			gds->HaveYear++;
471 			gds->Year = gds->tokenp[0].value;
472 			gds->tokenp += 1;
473 			return 1;
474 		}
475 
476 		if(gds->tokenp[0].value > 10000) {
477 			/* "20040301" */
478 			gds->HaveYear++;
479 			gds->HaveMonth++;
480 			gds->HaveDay++;
481 			gds->Day= (gds->tokenp[0].value)%100;
482 			gds->Month= (gds->tokenp[0].value/100)%100;
483 			gds->Year = gds->tokenp[0].value/10000;
484 			gds->tokenp += 1;
485 			return 1;
486 		}
487 
488 		if (gds->tokenp[0].value < 24) {
489 			gds->HaveTime++;
490 			gds->Hour = gds->tokenp[0].value;
491 			gds->Minutes = 0;
492 			gds->Seconds = 0;
493 			gds->tokenp += 1;
494 			return 1;
495 		}
496 
497 		if ((gds->tokenp[0].value / 100 < 24)
498 		    && (gds->tokenp[0].value % 100 < 60)) {
499 			/* "513" is same as "5:13" */
500 			gds->Hour = gds->tokenp[0].value / 100;
501 			gds->Minutes = gds->tokenp[0].value % 100;
502 			gds->Seconds = 0;
503 			gds->tokenp += 1;
504 			return 1;
505 		}
506 	}
507 
508 	return 0;
509 }
510 
511 /*
512  * A dictionary of time words.
513  */
514 static struct LEXICON {
515 	size_t		abbrev;
516 	const char	*name;
517 	int		type;
518 	time_t		value;
519 } const TimeWords[] = {
520 	/* am/pm */
521 	{ 0, "am",		tAMPM,	tAM },
522 	{ 0, "pm",		tAMPM,	tPM },
523 
524 	/* Month names. */
525 	{ 3, "january",		tMONTH,  1 },
526 	{ 3, "february",	tMONTH,  2 },
527 	{ 3, "march",		tMONTH,  3 },
528 	{ 3, "april",		tMONTH,  4 },
529 	{ 3, "may",		tMONTH,  5 },
530 	{ 3, "june",		tMONTH,  6 },
531 	{ 3, "july",		tMONTH,  7 },
532 	{ 3, "august",		tMONTH,  8 },
533 	{ 3, "september",	tMONTH,  9 },
534 	{ 3, "october",		tMONTH, 10 },
535 	{ 3, "november",	tMONTH, 11 },
536 	{ 3, "december",	tMONTH, 12 },
537 
538 	/* Days of the week. */
539 	{ 2, "sunday",		tDAY, 0 },
540 	{ 3, "monday",		tDAY, 1 },
541 	{ 2, "tuesday",		tDAY, 2 },
542 	{ 3, "wednesday",	tDAY, 3 },
543 	{ 2, "thursday",	tDAY, 4 },
544 	{ 2, "friday",		tDAY, 5 },
545 	{ 2, "saturday",	tDAY, 6 },
546 
547 	/* Timezones: Offsets are in seconds. */
548 	{ 0, "gmt",  tZONE,     0*HOUR }, /* Greenwich Mean */
549 	{ 0, "ut",   tZONE,     0*HOUR }, /* Universal (Coordinated) */
550 	{ 0, "utc",  tZONE,     0*HOUR },
551 	{ 0, "wet",  tZONE,     0*HOUR }, /* Western European */
552 	{ 0, "bst",  tDAYZONE,  0*HOUR }, /* British Summer */
553 	{ 0, "wat",  tZONE,     1*HOUR }, /* West Africa */
554 	{ 0, "at",   tZONE,     2*HOUR }, /* Azores */
555 	/* { 0, "bst", tZONE, 3*HOUR }, */ /* Brazil Standard: Conflict */
556 	/* { 0, "gst", tZONE, 3*HOUR }, */ /* Greenland Standard: Conflict*/
557 	{ 0, "nft",  tZONE,     3*HOUR+30*MINUTE }, /* Newfoundland */
558 	{ 0, "nst",  tZONE,     3*HOUR+30*MINUTE }, /* Newfoundland Standard */
559 	{ 0, "ndt",  tDAYZONE,  3*HOUR+30*MINUTE }, /* Newfoundland Daylight */
560 	{ 0, "ast",  tZONE,     4*HOUR }, /* Atlantic Standard */
561 	{ 0, "adt",  tDAYZONE,  4*HOUR }, /* Atlantic Daylight */
562 	{ 0, "est",  tZONE,     5*HOUR }, /* Eastern Standard */
563 	{ 0, "edt",  tDAYZONE,  5*HOUR }, /* Eastern Daylight */
564 	{ 0, "cst",  tZONE,     6*HOUR }, /* Central Standard */
565 	{ 0, "cdt",  tDAYZONE,  6*HOUR }, /* Central Daylight */
566 	{ 0, "mst",  tZONE,     7*HOUR }, /* Mountain Standard */
567 	{ 0, "mdt",  tDAYZONE,  7*HOUR }, /* Mountain Daylight */
568 	{ 0, "pst",  tZONE,     8*HOUR }, /* Pacific Standard */
569 	{ 0, "pdt",  tDAYZONE,  8*HOUR }, /* Pacific Daylight */
570 	{ 0, "yst",  tZONE,     9*HOUR }, /* Yukon Standard */
571 	{ 0, "ydt",  tDAYZONE,  9*HOUR }, /* Yukon Daylight */
572 	{ 0, "hst",  tZONE,     10*HOUR }, /* Hawaii Standard */
573 	{ 0, "hdt",  tDAYZONE,  10*HOUR }, /* Hawaii Daylight */
574 	{ 0, "cat",  tZONE,     10*HOUR }, /* Central Alaska */
575 	{ 0, "ahst", tZONE,     10*HOUR }, /* Alaska-Hawaii Standard */
576 	{ 0, "nt",   tZONE,     11*HOUR }, /* Nome */
577 	{ 0, "idlw", tZONE,     12*HOUR }, /* Intl Date Line West */
578 	{ 0, "cet",  tZONE,     -1*HOUR }, /* Central European */
579 	{ 0, "met",  tZONE,     -1*HOUR }, /* Middle European */
580 	{ 0, "mewt", tZONE,     -1*HOUR }, /* Middle European Winter */
581 	{ 0, "mest", tDAYZONE,  -1*HOUR }, /* Middle European Summer */
582 	{ 0, "swt",  tZONE,     -1*HOUR }, /* Swedish Winter */
583 	{ 0, "sst",  tDAYZONE,  -1*HOUR }, /* Swedish Summer */
584 	{ 0, "fwt",  tZONE,     -1*HOUR }, /* French Winter */
585 	{ 0, "fst",  tDAYZONE,  -1*HOUR }, /* French Summer */
586 	{ 0, "eet",  tZONE,     -2*HOUR }, /* Eastern Eur, USSR Zone 1 */
587 	{ 0, "bt",   tZONE,     -3*HOUR }, /* Baghdad, USSR Zone 2 */
588 	{ 0, "it",   tZONE,     -3*HOUR-30*MINUTE },/* Iran */
589 	{ 0, "zp4",  tZONE,     -4*HOUR }, /* USSR Zone 3 */
590 	{ 0, "zp5",  tZONE,     -5*HOUR }, /* USSR Zone 4 */
591 	{ 0, "ist",  tZONE,     -5*HOUR-30*MINUTE },/* Indian Standard */
592 	{ 0, "zp6",  tZONE,     -6*HOUR }, /* USSR Zone 5 */
593 	/* { 0, "nst",  tZONE, -6.5*HOUR }, */ /* North Sumatra: Conflict */
594 	/* { 0, "sst", tZONE, -7*HOUR }, */ /* So Sumatra, USSR 6: Conflict */
595 	{ 0, "wast", tZONE,     -7*HOUR }, /* West Australian Standard */
596 	{ 0, "wadt", tDAYZONE,  -7*HOUR }, /* West Australian Daylight */
597 	{ 0, "jt",   tZONE,     -7*HOUR-30*MINUTE },/* Java (3pm in Cronusland!)*/
598 	{ 0, "cct",  tZONE,     -8*HOUR }, /* China Coast, USSR Zone 7 */
599 	{ 0, "jst",  tZONE,     -9*HOUR }, /* Japan Std, USSR Zone 8 */
600 	{ 0, "cast", tZONE,     -9*HOUR-30*MINUTE },/* Ctrl Australian Std */
601 	{ 0, "cadt", tDAYZONE,  -9*HOUR-30*MINUTE },/* Ctrl Australian Daylt */
602 	{ 0, "east", tZONE,     -10*HOUR }, /* Eastern Australian Std */
603 	{ 0, "eadt", tDAYZONE,  -10*HOUR }, /* Eastern Australian Daylt */
604 	{ 0, "gst",  tZONE,     -10*HOUR }, /* Guam Std, USSR Zone 9 */
605 	{ 0, "nzt",  tZONE,     -12*HOUR }, /* New Zealand */
606 	{ 0, "nzst", tZONE,     -12*HOUR }, /* New Zealand Standard */
607 	{ 0, "nzdt", tDAYZONE,  -12*HOUR }, /* New Zealand Daylight */
608 	{ 0, "idle", tZONE,     -12*HOUR }, /* Intl Date Line East */
609 
610 	{ 0, "dst",  tDST,		0 },
611 
612 	/* Time units. */
613 	{ 4, "years",		tMONTH_UNIT,	12 },
614 	{ 5, "months",		tMONTH_UNIT,	1 },
615 	{ 9, "fortnights",	tSEC_UNIT,	14 * DAY },
616 	{ 4, "weeks",		tSEC_UNIT,	7 * DAY },
617 	{ 3, "days",		tSEC_UNIT,	DAY },
618 	{ 4, "hours",		tSEC_UNIT,	HOUR },
619 	{ 3, "minutes",		tSEC_UNIT,	MINUTE },
620 	{ 3, "seconds",		tSEC_UNIT,	1 },
621 
622 	/* Relative-time words. */
623 	{ 0, "tomorrow",	tSEC_UNIT,	DAY },
624 	{ 0, "yesterday",	tSEC_UNIT,	-DAY },
625 	{ 0, "today",		tSEC_UNIT,	0 },
626 	{ 0, "now",		tSEC_UNIT,	0 },
627 	{ 0, "last",		tUNUMBER,	-1 },
628 	{ 0, "this",		tSEC_UNIT,	0 },
629 	{ 0, "next",		tUNUMBER,	2 },
630 	{ 0, "first",		tUNUMBER,	1 },
631 	{ 0, "1st",		tUNUMBER,	1 },
632 /*	{ 0, "second",		tUNUMBER,	2 }, */
633 	{ 0, "2nd",		tUNUMBER,	2 },
634 	{ 0, "third",		tUNUMBER,	3 },
635 	{ 0, "3rd",		tUNUMBER,	3 },
636 	{ 0, "fourth",		tUNUMBER,	4 },
637 	{ 0, "4th",		tUNUMBER,	4 },
638 	{ 0, "fifth",		tUNUMBER,	5 },
639 	{ 0, "5th",		tUNUMBER,	5 },
640 	{ 0, "sixth",		tUNUMBER,	6 },
641 	{ 0, "seventh",		tUNUMBER,	7 },
642 	{ 0, "eighth",		tUNUMBER,	8 },
643 	{ 0, "ninth",		tUNUMBER,	9 },
644 	{ 0, "tenth",		tUNUMBER,	10 },
645 	{ 0, "eleventh",	tUNUMBER,	11 },
646 	{ 0, "twelfth",		tUNUMBER,	12 },
647 	{ 0, "ago",		tAGO,		1 },
648 
649 	/* Military timezones. */
650 	{ 0, "a",	tZONE,	1*HOUR },
651 	{ 0, "b",	tZONE,	2*HOUR },
652 	{ 0, "c",	tZONE,	3*HOUR },
653 	{ 0, "d",	tZONE,	4*HOUR },
654 	{ 0, "e",	tZONE,	5*HOUR },
655 	{ 0, "f",	tZONE,	6*HOUR },
656 	{ 0, "g",	tZONE,	7*HOUR },
657 	{ 0, "h",	tZONE,	8*HOUR },
658 	{ 0, "i",	tZONE,	9*HOUR },
659 	{ 0, "k",	tZONE,	10*HOUR },
660 	{ 0, "l",	tZONE,	11*HOUR },
661 	{ 0, "m",	tZONE,	12*HOUR },
662 	{ 0, "n",	tZONE,	-1*HOUR },
663 	{ 0, "o",	tZONE,	-2*HOUR },
664 	{ 0, "p",	tZONE,	-3*HOUR },
665 	{ 0, "q",	tZONE,	-4*HOUR },
666 	{ 0, "r",	tZONE,	-5*HOUR },
667 	{ 0, "s",	tZONE,	-6*HOUR },
668 	{ 0, "t",	tZONE,	-7*HOUR },
669 	{ 0, "u",	tZONE,	-8*HOUR },
670 	{ 0, "v",	tZONE,	-9*HOUR },
671 	{ 0, "w",	tZONE,	-10*HOUR },
672 	{ 0, "x",	tZONE,	-11*HOUR },
673 	{ 0, "y",	tZONE,	-12*HOUR },
674 	{ 0, "z",	tZONE,	0*HOUR },
675 
676 	/* End of table. */
677 	{ 0, NULL,	0,	0 }
678 };
679 
680 /*
681  * Year is either:
682  *  = A number from 0 to 99, which means a year from 1970 to 2069, or
683  *  = The actual year (>=100).
684  */
685 static time_t
Convert(time_t Month,time_t Day,time_t Year,time_t Hours,time_t Minutes,time_t Seconds,time_t Timezone,enum DSTMODE DSTmode)686 Convert(time_t Month, time_t Day, time_t Year,
687 	time_t Hours, time_t Minutes, time_t Seconds,
688 	time_t Timezone, enum DSTMODE DSTmode)
689 {
690 	signed char DaysInMonth[12] = {
691 		31, 0, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
692 	};
693 	time_t		Julian;
694 	int		i;
695 	struct tm	*ltime;
696 #if defined(HAVE_LOCALTIME_R) || defined(HAVE_LOCALTIME_S)
697 	struct tm	tmbuf;
698 #endif
699 
700 	if (Year < 69)
701 		Year += 2000;
702 	else if (Year < 100)
703 		Year += 1900;
704 	DaysInMonth[1] = Year % 4 == 0 && (Year % 100 != 0 || Year % 400 == 0)
705 	    ? 29 : 28;
706 	if (Year < EPOCH || (sizeof(time_t) <= 4 && Year >= 2038)
707 	    || Month < 1 || Month > 12
708 	    /* Lint fluff:  "conversion from long may lose accuracy" */
709 	    || Day < 1 || Day > DaysInMonth[(int)--Month]
710 	    || Hours < 0 || Hours > 23
711 	    || Minutes < 0 || Minutes > 59
712 	    || Seconds < 0 || Seconds > 59)
713 		return -1;
714 
715 	Julian = Day - 1;
716 	for (i = 0; i < Month; i++)
717 		Julian += DaysInMonth[i];
718 	for (i = EPOCH; i < Year; i++)
719 		Julian += 365 + (i % 4 == 0);
720 	Julian *= DAY;
721 	Julian += Timezone;
722 	Julian += Hours * HOUR + Minutes * MINUTE + Seconds;
723 #if defined(HAVE_LOCALTIME_S)
724 	ltime = localtime_s(&tmbuf, &Julian) ? NULL : &tmbuf;
725 #elif defined(HAVE_LOCALTIME_R)
726 	ltime = localtime_r(&Julian, &tmbuf);
727 #else
728 	ltime = localtime(&Julian);
729 #endif
730 	if (DSTmode == DSTon
731 	    || (DSTmode == DSTmaybe && ltime->tm_isdst))
732 		Julian -= HOUR;
733 	return Julian;
734 }
735 
736 static time_t
DSTcorrect(time_t Start,time_t Future)737 DSTcorrect(time_t Start, time_t Future)
738 {
739 	time_t		StartDay;
740 	time_t		FutureDay;
741 	struct tm	*ltime;
742 #if defined(HAVE_LOCALTIME_R) || defined(HAVE_LOCALTIME_S)
743 	struct tm	tmbuf;
744 #endif
745 #if defined(HAVE_LOCALTIME_S)
746 	ltime = localtime_s(&tmbuf, &Start) ? NULL : &tmbuf;
747 #elif defined(HAVE_LOCALTIME_R)
748 	ltime = localtime_r(&Start, &tmbuf);
749 #else
750 	ltime = localtime(&Start);
751 #endif
752 	StartDay = (ltime->tm_hour + 1) % 24;
753 #if defined(HAVE_LOCALTIME_S)
754 	ltime = localtime_s(&tmbuf, &Future) ? NULL : &tmbuf;
755 #elif defined(HAVE_LOCALTIME_R)
756 	ltime = localtime_r(&Future, &tmbuf);
757 #else
758 	ltime = localtime(&Future);
759 #endif
760 	FutureDay = (ltime->tm_hour + 1) % 24;
761 	return (Future - Start) + (StartDay - FutureDay) * HOUR;
762 }
763 
764 
765 static time_t
RelativeDate(time_t Start,time_t zone,int dstmode,time_t DayOrdinal,time_t DayNumber)766 RelativeDate(time_t Start, time_t zone, int dstmode,
767     time_t DayOrdinal, time_t DayNumber)
768 {
769 	struct tm	*tm;
770 	time_t	t, now;
771 #if defined(HAVE_GMTIME_R) || defined(HAVE_GMTIME_S)
772 	struct tm	tmbuf;
773 #endif
774 
775 	t = Start - zone;
776 #if defined(HAVE_GMTIME_S)
777 	tm = gmtime_s(&tmbuf, &t) ? NULL : &tmbuf;
778 #elif defined(HAVE_GMTIME_R)
779 	tm = gmtime_r(&t, &tmbuf);
780 #else
781 	tm = gmtime(&t);
782 #endif
783 	now = Start;
784 	now += DAY * ((DayNumber - tm->tm_wday + 7) % 7);
785 	now += 7 * DAY * (DayOrdinal <= 0 ? DayOrdinal : DayOrdinal - 1);
786 	if (dstmode == DSTmaybe)
787 		return DSTcorrect(Start, now);
788 	return now - Start;
789 }
790 
791 
792 static time_t
RelativeMonth(time_t Start,time_t Timezone,time_t RelMonth)793 RelativeMonth(time_t Start, time_t Timezone, time_t RelMonth)
794 {
795 	struct tm	*tm;
796 	time_t	Month;
797 	time_t	Year;
798 #if defined(HAVE_LOCALTIME_R) || defined(HAVE_LOCALTIME_S)
799 	struct tm	tmbuf;
800 #endif
801 
802 	if (RelMonth == 0)
803 		return 0;
804 #if defined(HAVE_LOCALTIME_S)
805 	tm = localtime_s(&tmbuf, &Start) ? NULL : &tmbuf;
806 #elif defined(HAVE_LOCALTIME_R)
807 	tm = localtime_r(&Start, &tmbuf);
808 #else
809 	tm = localtime(&Start);
810 #endif
811 	Month = 12 * (tm->tm_year + 1900) + tm->tm_mon + RelMonth;
812 	Year = Month / 12;
813 	Month = Month % 12 + 1;
814 	return DSTcorrect(Start,
815 	    Convert(Month, (time_t)tm->tm_mday, Year,
816 		(time_t)tm->tm_hour, (time_t)tm->tm_min, (time_t)tm->tm_sec,
817 		Timezone, DSTmaybe));
818 }
819 
820 /*
821  * Parses and consumes an unsigned number.
822  * Returns 1 if any number is parsed. Otherwise, *value is unchanged.
823  */
824 static char
consume_unsigned_number(const char ** in,time_t * value)825 consume_unsigned_number(const char **in, time_t *value)
826 {
827 	char c;
828 	if (isdigit((unsigned char)(c = **in))) {
829 		for (*value = 0; isdigit((unsigned char)(c = *(*in)++)); )
830 			*value = 10 * *value + c - '0';
831 		(*in)--;
832 		return 1;
833 	}
834 	return 0;
835 }
836 
837 /*
838  * Tokenizer.
839  */
840 static int
nexttoken(const char ** in,time_t * value)841 nexttoken(const char **in, time_t *value)
842 {
843 	char	c;
844 	char	buff[64];
845 
846 	for ( ; ; ) {
847 		while (isspace((unsigned char)**in))
848 			++*in;
849 
850 		/* Skip parenthesized comments. */
851 		if (**in == '(') {
852 			int Count = 0;
853 			do {
854 				c = *(*in)++;
855 				if (c == '\0')
856 					return c;
857 				if (c == '(')
858 					Count++;
859 				else if (c == ')')
860 					Count--;
861 			} while (Count > 0);
862 			continue;
863 		}
864 
865 		/* Try the next token in the word table first. */
866 		/* This allows us to match "2nd", for example. */
867 		{
868 			const char *src = *in;
869 			const struct LEXICON *tp;
870 			unsigned i = 0;
871 
872 			/* Force to lowercase and strip '.' characters. */
873 			while (*src != '\0'
874 			    && (isalnum((unsigned char)*src) || *src == '.')
875 			    && i < sizeof(buff)-1) {
876 				if (*src != '.') {
877 					if (isupper((unsigned char)*src))
878 						buff[i++] = (char)tolower(
879 						    (unsigned char)*src);
880 					else
881 						buff[i++] = *src;
882 				}
883 				src++;
884 			}
885 			buff[i] = '\0';
886 
887 			/*
888 			 * Find the first match.  If the word can be
889 			 * abbreviated, make sure we match at least
890 			 * the minimum abbreviation.
891 			 */
892 			for (tp = TimeWords; tp->name; tp++) {
893 				size_t abbrev = tp->abbrev;
894 				if (abbrev == 0)
895 					abbrev = strlen(tp->name);
896 				if (strlen(buff) >= abbrev
897 				    && strncmp(tp->name, buff, strlen(buff))
898 				    	== 0) {
899 					/* Skip over token. */
900 					*in = src;
901 					/* Return the match. */
902 					*value = tp->value;
903 					return tp->type;
904 				}
905 			}
906 		}
907 
908 		/*
909 		 * Not in the word table, maybe it's a number.  Note:
910 		 * Because '-' and '+' have other special meanings, I
911 		 * don't deal with signed numbers here.
912 		 */
913 		if (consume_unsigned_number(in, value)) {
914 			return (tUNUMBER);
915 		}
916 
917 		return *(*in)++;
918 	}
919 }
920 
921 #define	TM_YEAR_ORIGIN 1900
922 
923 /* Yield A - B, measured in seconds.  */
924 static long
difftm(struct tm * a,struct tm * b)925 difftm (struct tm *a, struct tm *b)
926 {
927 	int ay = a->tm_year + (TM_YEAR_ORIGIN - 1);
928 	int by = b->tm_year + (TM_YEAR_ORIGIN - 1);
929 	long days = (
930 		/* difference in day of year */
931 		a->tm_yday - b->tm_yday
932 		/* + intervening leap days */
933 		+  ((ay >> 2) - (by >> 2))
934 		-  (ay/100 - by/100)
935 		+  ((ay/100 >> 2) - (by/100 >> 2))
936 		/* + difference in years * 365 */
937 		+  (long)(ay-by) * 365
938 		);
939 	return (days * DAY + (a->tm_hour - b->tm_hour) * HOUR
940 	    + (a->tm_min - b->tm_min) * MINUTE
941 	    + (a->tm_sec - b->tm_sec));
942 }
943 
944 /*
945  * Parses a Unix epoch timestamp (seconds).
946  * This supports a subset of what GNU tar accepts from black box testing,
947  * but covers common use cases.
948  */
949 static time_t
parse_unix_epoch(const char * p)950 parse_unix_epoch(const char *p)
951 {
952 	time_t epoch;
953 
954 	/* may begin with + */
955 	if (*p == '+') {
956 		p++;
957 	}
958 
959 	/* followed by some number */
960 	if (!consume_unsigned_number(&p, &epoch))
961 		return (time_t)-1;
962 
963 	/* ...and nothing else */
964 	if (*p != '\0')
965 		return (time_t)-1;
966 
967 	return epoch;
968 }
969 
970 /*
971  *
972  * The public function.
973  *
974  * TODO: tokens[] array should be dynamically sized.
975  */
976 time_t
archive_parse_date(time_t now,const char * p)977 archive_parse_date(time_t now, const char *p)
978 {
979 	struct token	tokens[256];
980 	struct gdstate	_gds;
981 	struct token	*lasttoken;
982 	struct gdstate	*gds;
983 	struct tm	local, *tm;
984 	struct tm	gmt, *gmt_ptr;
985 	time_t		Start;
986 	time_t		tod;
987 	long		tzone;
988 
989 	/*
990 	 * @-prefixed Unix epoch timestamps (seconds)
991 	 * Skip the complex tokenizer - We do not want to accept strings like "@tenth"
992 	 */
993 	if (*p == '@')
994 		return parse_unix_epoch(p + 1);
995 
996 	/* Clear out the parsed token array. */
997 	memset(tokens, 0, sizeof(tokens));
998 	/* Initialize the parser state. */
999 	memset(&_gds, 0, sizeof(_gds));
1000 	gds = &_gds;
1001 
1002 	/* Look up the current time. */
1003 #if defined(HAVE_LOCALTIME_S)
1004 	tm = localtime_s(&local, &now) ? NULL : &local;
1005 #elif defined(HAVE_LOCALTIME_R)
1006 	tm = localtime_r(&now, &local);
1007 #else
1008 	memset(&local, 0, sizeof(local));
1009 	tm = localtime(&now);
1010 #endif
1011 	if (tm == NULL)
1012 		return -1;
1013 #if !defined(HAVE_LOCALTIME_R) && !defined(HAVE_LOCALTIME_S)
1014 	local = *tm;
1015 #endif
1016 
1017 	/* Look up UTC if we can and use that to determine the current
1018 	 * timezone offset. */
1019 #if defined(HAVE_GMTIME_S)
1020 	gmt_ptr = gmtime_s(&gmt, &now) ? NULL : &gmt;
1021 #elif defined(HAVE_GMTIME_R)
1022 	gmt_ptr = gmtime_r(&now, &gmt);
1023 #else
1024 	memset(&gmt, 0, sizeof(gmt));
1025 	gmt_ptr = gmtime(&now);
1026 	if (gmt_ptr != NULL) {
1027 		/* Copy, in case localtime and gmtime use the same buffer. */
1028 		gmt = *gmt_ptr;
1029 	}
1030 #endif
1031 	if (gmt_ptr != NULL)
1032 		tzone = difftm (&gmt, &local);
1033 	else
1034 		/* This system doesn't understand timezones; fake it. */
1035 		tzone = 0;
1036 	if(local.tm_isdst)
1037 		tzone += HOUR;
1038 
1039 	/* Tokenize the input string. */
1040 	lasttoken = tokens;
1041 	while ((lasttoken->token = nexttoken(&p, &lasttoken->value)) != 0) {
1042 		++lasttoken;
1043 		if (lasttoken > tokens + 255)
1044 			return -1;
1045 	}
1046 	gds->tokenp = tokens;
1047 
1048 	/* Match phrases until we run out of input tokens. */
1049 	while (gds->tokenp < lasttoken) {
1050 		if (!phrase(gds))
1051 			return -1;
1052 	}
1053 
1054 	/* Use current local timezone if none was specified. */
1055 	if (!gds->HaveZone) {
1056 		gds->Timezone = tzone;
1057 		gds->DSTmode = DSTmaybe;
1058 	}
1059 
1060 	/* If a timezone was specified, use that for generating the default
1061 	 * time components instead of the local timezone. */
1062 	if (gds->HaveZone && gmt_ptr != NULL) {
1063 		now -= gds->Timezone;
1064 #if defined(HAVE_GMTIME_S)
1065 		gmt_ptr = gmtime_s(&gmt, &now) ? NULL : &gmt;
1066 #elif defined(HAVE_GMTIME_R)
1067 		gmt_ptr = gmtime_r(&now, &gmt);
1068 #else
1069 		gmt_ptr = gmtime(&now);
1070 #endif
1071 		if (gmt_ptr != NULL)
1072 			local = *gmt_ptr;
1073 		now += gds->Timezone;
1074 	}
1075 
1076 	if (!gds->HaveYear)
1077 		gds->Year = local.tm_year + 1900;
1078 	if (!gds->HaveMonth)
1079 		gds->Month = local.tm_mon + 1;
1080 	if (!gds->HaveDay)
1081 		gds->Day = local.tm_mday;
1082 	/* Note: No default for hour/min/sec; a specifier that just
1083 	 * gives date always refers to 00:00 on that date. */
1084 
1085 	/* If we saw more than one time, timezone, weekday, year, month,
1086 	 * or day, then give up. */
1087 	if (gds->HaveTime > 1 || gds->HaveZone > 1 || gds->HaveWeekDay > 1
1088 	    || gds->HaveYear > 1 || gds->HaveMonth > 1 || gds->HaveDay > 1)
1089 		return -1;
1090 
1091 	/* Compute an absolute time based on whatever absolute information
1092 	 * we collected. */
1093 	if (gds->HaveYear || gds->HaveMonth || gds->HaveDay
1094 	    || gds->HaveTime || gds->HaveWeekDay) {
1095 		Start = Convert(gds->Month, gds->Day, gds->Year,
1096 		    gds->Hour, gds->Minutes, gds->Seconds,
1097 		    gds->Timezone, gds->DSTmode);
1098 		if (Start < 0)
1099 			return -1;
1100 	} else {
1101 		Start = now;
1102 		if (!gds->HaveRel)
1103 			Start -= local.tm_hour * HOUR + local.tm_min * MINUTE
1104 			    + local.tm_sec;
1105 	}
1106 
1107 	/* Add the relative offset. */
1108 	Start += gds->RelSeconds;
1109 	Start += RelativeMonth(Start, gds->Timezone, gds->RelMonth);
1110 
1111 	/* Adjust for day-of-week offsets. */
1112 	if (gds->HaveWeekDay
1113 	    && !(gds->HaveYear || gds->HaveMonth || gds->HaveDay)) {
1114 		tod = RelativeDate(Start, gds->Timezone,
1115 		    gds->DSTmode, gds->DayOrdinal, gds->DayNumber);
1116 		Start += tod;
1117 	}
1118 
1119 	/* -1 is an error indicator, so return 0 instead of -1 if
1120 	 * that's the actual time. */
1121 	return Start == -1 ? 0 : Start;
1122 }
1123 
1124 
1125 #if	defined(TEST)
1126 
1127 /* ARGSUSED */
1128 int
main(int argc,char ** argv)1129 main(int argc, char **argv)
1130 {
1131     time_t	d;
1132     time_t	now = time(NULL);
1133 
1134     while (*++argv != NULL) {
1135 	    (void)printf("Input: %s\n", *argv);
1136 	    d = get_date(now, *argv);
1137 	    if (d == -1)
1138 		    (void)printf("Bad format - couldn't convert.\n");
1139 	    else
1140 		    (void)printf("Output: %s\n", ctime(&d));
1141     }
1142     exit(0);
1143     /* NOTREACHED */
1144 }
1145 #endif	/* defined(TEST) */
1146