xref: /freebsd/contrib/libarchive/libarchive/archive_getdate.c (revision d3d381b2b194b4d24853e92eecef55f262688d1a)
1 /*
2  * This code is in the public domain and has no copyright.
3  *
4  * This is a plain C recursive-descent translation of an old
5  * public-domain YACC grammar that has been used for parsing dates in
6  * very many open-source projects.
7  *
8  * Since the original authors were generous enough to donate their
9  * work to the public domain, I feel compelled to match their
10  * generosity.
11  *
12  * Tim Kientzle, February 2009.
13  */
14 
15 /*
16  * Header comment from original getdate.y:
17  */
18 
19 /*
20 **  Originally written by Steven M. Bellovin <smb@research.att.com> while
21 **  at the University of North Carolina at Chapel Hill.  Later tweaked by
22 **  a couple of people on Usenet.  Completely overhauled by Rich $alz
23 **  <rsalz@bbn.com> and Jim Berets <jberets@bbn.com> in August, 1990;
24 **
25 **  This grammar has 10 shift/reduce conflicts.
26 **
27 **  This code is in the public domain and has no copyright.
28 */
29 
30 #ifdef __FreeBSD__
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 #endif
34 
35 #include <ctype.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <time.h>
40 
41 #define __LIBARCHIVE_BUILD 1
42 #include "archive_getdate.h"
43 
44 /* Basic time units. */
45 #define	EPOCH		1970
46 #define	MINUTE		(60L)
47 #define	HOUR		(60L * MINUTE)
48 #define	DAY		(24L * HOUR)
49 
50 /* Daylight-savings mode:  on, off, or not yet known. */
51 enum DSTMODE { DSTon, DSToff, DSTmaybe };
52 /* Meridian:  am or pm. */
53 enum { tAM, tPM };
54 /* Token types returned by nexttoken() */
55 enum { tAGO = 260, tDAY, tDAYZONE, tAMPM, tMONTH, tMONTH_UNIT, tSEC_UNIT,
56        tUNUMBER, tZONE, tDST };
57 struct token { int token; time_t value; };
58 
59 /*
60  * Parser state.
61  */
62 struct gdstate {
63 	struct token *tokenp; /* Pointer to next token. */
64 	/* HaveXxxx counts how many of this kind of phrase we've seen;
65 	 * it's a fatal error to have more than one time, zone, day,
66 	 * or date phrase. */
67 	int	HaveYear;
68 	int	HaveMonth;
69 	int	HaveDay;
70 	int	HaveWeekDay; /* Day of week */
71 	int	HaveTime; /* Hour/minute/second */
72 	int	HaveZone; /* timezone and/or DST info */
73 	int	HaveRel; /* time offset; we can have more than one */
74 	/* Absolute time values. */
75 	time_t	Timezone;  /* Seconds offset from GMT */
76 	time_t	Day;
77 	time_t	Hour;
78 	time_t	Minutes;
79 	time_t	Month;
80 	time_t	Seconds;
81 	time_t	Year;
82 	/* DST selection */
83 	enum DSTMODE	DSTmode;
84 	/* Day of week accounting, e.g., "3rd Tuesday" */
85 	time_t	DayOrdinal; /* "3" in "3rd Tuesday" */
86 	time_t	DayNumber; /* "Tuesday" in "3rd Tuesday" */
87 	/* Relative time values: hour/day/week offsets are measured in
88 	 * seconds, month/year are counted in months. */
89 	time_t	RelMonth;
90 	time_t	RelSeconds;
91 };
92 
93 /*
94  * A series of functions that recognize certain common time phrases.
95  * Each function returns 1 if it managed to make sense of some of the
96  * tokens, zero otherwise.
97  */
98 
99 /*
100  *  hour:minute or hour:minute:second with optional AM, PM, or numeric
101  *  timezone offset
102  */
103 static int
104 timephrase(struct gdstate *gds)
105 {
106 	if (gds->tokenp[0].token == tUNUMBER
107 	    && gds->tokenp[1].token == ':'
108 	    && gds->tokenp[2].token == tUNUMBER
109 	    && gds->tokenp[3].token == ':'
110 	    && gds->tokenp[4].token == tUNUMBER) {
111 		/* "12:14:18" or "22:08:07" */
112 		++gds->HaveTime;
113 		gds->Hour = gds->tokenp[0].value;
114 		gds->Minutes = gds->tokenp[2].value;
115 		gds->Seconds = gds->tokenp[4].value;
116 		gds->tokenp += 5;
117 	}
118 	else if (gds->tokenp[0].token == tUNUMBER
119 	    && gds->tokenp[1].token == ':'
120 	    && gds->tokenp[2].token == tUNUMBER) {
121 		/* "12:14" or "22:08" */
122 		++gds->HaveTime;
123 		gds->Hour = gds->tokenp[0].value;
124 		gds->Minutes = gds->tokenp[2].value;
125 		gds->Seconds = 0;
126 		gds->tokenp += 3;
127 	}
128 	else if (gds->tokenp[0].token == tUNUMBER
129 	    && gds->tokenp[1].token == tAMPM) {
130 		/* "7" is a time if it's followed by "am" or "pm" */
131 		++gds->HaveTime;
132 		gds->Hour = gds->tokenp[0].value;
133 		gds->Minutes = gds->Seconds = 0;
134 		/* We'll handle the AM/PM below. */
135 		gds->tokenp += 1;
136 	} else {
137 		/* We can't handle this. */
138 		return 0;
139 	}
140 
141 	if (gds->tokenp[0].token == tAMPM) {
142 		/* "7:12pm", "12:20:13am" */
143 		if (gds->Hour == 12)
144 			gds->Hour = 0;
145 		if (gds->tokenp[0].value == tPM)
146 			gds->Hour += 12;
147 		gds->tokenp += 1;
148 	}
149 	if (gds->tokenp[0].token == '+'
150 	    && gds->tokenp[1].token == tUNUMBER) {
151 		/* "7:14+0700" */
152 		gds->HaveZone++;
153 		gds->DSTmode = DSToff;
154 		gds->Timezone = - ((gds->tokenp[1].value / 100) * HOUR
155 		    + (gds->tokenp[1].value % 100) * MINUTE);
156 		gds->tokenp += 2;
157 	}
158 	if (gds->tokenp[0].token == '-'
159 	    && gds->tokenp[1].token == tUNUMBER) {
160 		/* "19:14:12-0530" */
161 		gds->HaveZone++;
162 		gds->DSTmode = DSToff;
163 		gds->Timezone = + ((gds->tokenp[1].value / 100) * HOUR
164 		    + (gds->tokenp[1].value % 100) * MINUTE);
165 		gds->tokenp += 2;
166 	}
167 	return 1;
168 }
169 
170 /*
171  * Timezone name, possibly including DST.
172  */
173 static int
174 zonephrase(struct gdstate *gds)
175 {
176 	if (gds->tokenp[0].token == tZONE
177 	    && gds->tokenp[1].token == tDST) {
178 		gds->HaveZone++;
179 		gds->Timezone = gds->tokenp[0].value;
180 		gds->DSTmode = DSTon;
181 		gds->tokenp += 1;
182 		return 1;
183 	}
184 
185 	if (gds->tokenp[0].token == tZONE) {
186 		gds->HaveZone++;
187 		gds->Timezone = gds->tokenp[0].value;
188 		gds->DSTmode = DSToff;
189 		gds->tokenp += 1;
190 		return 1;
191 	}
192 
193 	if (gds->tokenp[0].token == tDAYZONE) {
194 		gds->HaveZone++;
195 		gds->Timezone = gds->tokenp[0].value;
196 		gds->DSTmode = DSTon;
197 		gds->tokenp += 1;
198 		return 1;
199 	}
200 	return 0;
201 }
202 
203 /*
204  * Year/month/day in various combinations.
205  */
206 static int
207 datephrase(struct gdstate *gds)
208 {
209 	if (gds->tokenp[0].token == tUNUMBER
210 	    && gds->tokenp[1].token == '/'
211 	    && gds->tokenp[2].token == tUNUMBER
212 	    && gds->tokenp[3].token == '/'
213 	    && gds->tokenp[4].token == tUNUMBER) {
214 		gds->HaveYear++;
215 		gds->HaveMonth++;
216 		gds->HaveDay++;
217 		if (gds->tokenp[0].value >= 13) {
218 			/* First number is big:  2004/01/29, 99/02/17 */
219 			gds->Year = gds->tokenp[0].value;
220 			gds->Month = gds->tokenp[2].value;
221 			gds->Day = gds->tokenp[4].value;
222 		} else if ((gds->tokenp[4].value >= 13)
223 		    || (gds->tokenp[2].value >= 13)) {
224 			/* Last number is big:  01/07/98 */
225 			/* Middle number is big:  01/29/04 */
226 			gds->Month = gds->tokenp[0].value;
227 			gds->Day = gds->tokenp[2].value;
228 			gds->Year = gds->tokenp[4].value;
229 		} else {
230 			/* No significant clues: 02/03/04 */
231 			gds->Month = gds->tokenp[0].value;
232 			gds->Day = gds->tokenp[2].value;
233 			gds->Year = gds->tokenp[4].value;
234 		}
235 		gds->tokenp += 5;
236 		return 1;
237 	}
238 
239 	if (gds->tokenp[0].token == tUNUMBER
240 	    && gds->tokenp[1].token == '/'
241 	    && gds->tokenp[2].token == tUNUMBER) {
242 		/* "1/15" */
243 		gds->HaveMonth++;
244 		gds->HaveDay++;
245 		gds->Month = gds->tokenp[0].value;
246 		gds->Day = gds->tokenp[2].value;
247 		gds->tokenp += 3;
248 		return 1;
249 	}
250 
251 	if (gds->tokenp[0].token == tUNUMBER
252 	    && gds->tokenp[1].token == '-'
253 	    && gds->tokenp[2].token == tUNUMBER
254 	    && gds->tokenp[3].token == '-'
255 	    && gds->tokenp[4].token == tUNUMBER) {
256 		/* ISO 8601 format.  yyyy-mm-dd.  */
257 		gds->HaveYear++;
258 		gds->HaveMonth++;
259 		gds->HaveDay++;
260 		gds->Year = gds->tokenp[0].value;
261 		gds->Month = gds->tokenp[2].value;
262 		gds->Day = gds->tokenp[4].value;
263 		gds->tokenp += 5;
264 		return 1;
265 	}
266 
267 	if (gds->tokenp[0].token == tUNUMBER
268 	    && gds->tokenp[1].token == '-'
269 	    && gds->tokenp[2].token == tMONTH
270 	    && gds->tokenp[3].token == '-'
271 	    && gds->tokenp[4].token == tUNUMBER) {
272 		gds->HaveYear++;
273 		gds->HaveMonth++;
274 		gds->HaveDay++;
275 		if (gds->tokenp[0].value > 31) {
276 			/* e.g. 1992-Jun-17 */
277 			gds->Year = gds->tokenp[0].value;
278 			gds->Month = gds->tokenp[2].value;
279 			gds->Day = gds->tokenp[4].value;
280 		} else {
281 			/* e.g. 17-JUN-1992.  */
282 			gds->Day = gds->tokenp[0].value;
283 			gds->Month = gds->tokenp[2].value;
284 			gds->Year = gds->tokenp[4].value;
285 		}
286 		gds->tokenp += 5;
287 		return 1;
288 	}
289 
290 	if (gds->tokenp[0].token == tMONTH
291 	    && gds->tokenp[1].token == tUNUMBER
292 	    && gds->tokenp[2].token == ','
293 	    && gds->tokenp[3].token == tUNUMBER) {
294 		/* "June 17, 2001" */
295 		gds->HaveYear++;
296 		gds->HaveMonth++;
297 		gds->HaveDay++;
298 		gds->Month = gds->tokenp[0].value;
299 		gds->Day = gds->tokenp[1].value;
300 		gds->Year = gds->tokenp[3].value;
301 		gds->tokenp += 4;
302 		return 1;
303 	}
304 
305 	if (gds->tokenp[0].token == tMONTH
306 	    && gds->tokenp[1].token == tUNUMBER) {
307 		/* "May 3" */
308 		gds->HaveMonth++;
309 		gds->HaveDay++;
310 		gds->Month = gds->tokenp[0].value;
311 		gds->Day = gds->tokenp[1].value;
312 		gds->tokenp += 2;
313 		return 1;
314 	}
315 
316 	if (gds->tokenp[0].token == tUNUMBER
317 	    && gds->tokenp[1].token == tMONTH
318 	    && gds->tokenp[2].token == tUNUMBER) {
319 		/* "12 Sept 1997" */
320 		gds->HaveYear++;
321 		gds->HaveMonth++;
322 		gds->HaveDay++;
323 		gds->Day = gds->tokenp[0].value;
324 		gds->Month = gds->tokenp[1].value;
325 		gds->Year = gds->tokenp[2].value;
326 		gds->tokenp += 3;
327 		return 1;
328 	}
329 
330 	if (gds->tokenp[0].token == tUNUMBER
331 	    && gds->tokenp[1].token == tMONTH) {
332 		/* "12 Sept" */
333 		gds->HaveMonth++;
334 		gds->HaveDay++;
335 		gds->Day = gds->tokenp[0].value;
336 		gds->Month = gds->tokenp[1].value;
337 		gds->tokenp += 2;
338 		return 1;
339 	}
340 
341 	return 0;
342 }
343 
344 /*
345  * Relative time phrase: "tomorrow", "yesterday", "+1 hour", etc.
346  */
347 static int
348 relunitphrase(struct gdstate *gds)
349 {
350 	if (gds->tokenp[0].token == '-'
351 	    && gds->tokenp[1].token == tUNUMBER
352 	    && gds->tokenp[2].token == tSEC_UNIT) {
353 		/* "-3 hours" */
354 		gds->HaveRel++;
355 		gds->RelSeconds -= gds->tokenp[1].value * gds->tokenp[2].value;
356 		gds->tokenp += 3;
357 		return 1;
358 	}
359 	if (gds->tokenp[0].token == '+'
360 	    && gds->tokenp[1].token == tUNUMBER
361 	    && gds->tokenp[2].token == tSEC_UNIT) {
362 		/* "+1 minute" */
363 		gds->HaveRel++;
364 		gds->RelSeconds += gds->tokenp[1].value * gds->tokenp[2].value;
365 		gds->tokenp += 3;
366 		return 1;
367 	}
368 	if (gds->tokenp[0].token == tUNUMBER
369 	    && gds->tokenp[1].token == tSEC_UNIT) {
370 		/* "1 day" */
371 		gds->HaveRel++;
372 		gds->RelSeconds += gds->tokenp[0].value * gds->tokenp[1].value;
373 		gds->tokenp += 2;
374 		return 1;
375 	}
376 	if (gds->tokenp[0].token == '-'
377 	    && gds->tokenp[1].token == tUNUMBER
378 	    && gds->tokenp[2].token == tMONTH_UNIT) {
379 		/* "-3 months" */
380 		gds->HaveRel++;
381 		gds->RelMonth -= gds->tokenp[1].value * gds->tokenp[2].value;
382 		gds->tokenp += 3;
383 		return 1;
384 	}
385 	if (gds->tokenp[0].token == '+'
386 	    && gds->tokenp[1].token == tUNUMBER
387 	    && gds->tokenp[2].token == tMONTH_UNIT) {
388 		/* "+5 years" */
389 		gds->HaveRel++;
390 		gds->RelMonth += gds->tokenp[1].value * gds->tokenp[2].value;
391 		gds->tokenp += 3;
392 		return 1;
393 	}
394 	if (gds->tokenp[0].token == tUNUMBER
395 	    && gds->tokenp[1].token == tMONTH_UNIT) {
396 		/* "2 years" */
397 		gds->HaveRel++;
398 		gds->RelMonth += gds->tokenp[0].value * gds->tokenp[1].value;
399 		gds->tokenp += 2;
400 		return 1;
401 	}
402 	if (gds->tokenp[0].token == tSEC_UNIT) {
403 		/* "now", "tomorrow" */
404 		gds->HaveRel++;
405 		gds->RelSeconds += gds->tokenp[0].value;
406 		gds->tokenp += 1;
407 		return 1;
408 	}
409 	if (gds->tokenp[0].token == tMONTH_UNIT) {
410 		/* "month" */
411 		gds->HaveRel++;
412 		gds->RelMonth += gds->tokenp[0].value;
413 		gds->tokenp += 1;
414 		return 1;
415 	}
416 	return 0;
417 }
418 
419 /*
420  * Day of the week specification.
421  */
422 static int
423 dayphrase(struct gdstate *gds)
424 {
425 	if (gds->tokenp[0].token == tDAY) {
426 		/* "tues", "wednesday," */
427 		gds->HaveWeekDay++;
428 		gds->DayOrdinal = 1;
429 		gds->DayNumber = gds->tokenp[0].value;
430 		gds->tokenp += 1;
431 		if (gds->tokenp[0].token == ',')
432 			gds->tokenp += 1;
433 		return 1;
434 	}
435 	if (gds->tokenp[0].token == tUNUMBER
436 		&& gds->tokenp[1].token == tDAY) {
437 		/* "second tues" "3 wed" */
438 		gds->HaveWeekDay++;
439 		gds->DayOrdinal = gds->tokenp[0].value;
440 		gds->DayNumber = gds->tokenp[1].value;
441 		gds->tokenp += 2;
442 		return 1;
443 	}
444 	return 0;
445 }
446 
447 /*
448  * Try to match a phrase using one of the above functions.
449  * This layer also deals with a couple of generic issues.
450  */
451 static int
452 phrase(struct gdstate *gds)
453 {
454 	if (timephrase(gds))
455 		return 1;
456 	if (zonephrase(gds))
457 		return 1;
458 	if (datephrase(gds))
459 		return 1;
460 	if (dayphrase(gds))
461 		return 1;
462 	if (relunitphrase(gds)) {
463 		if (gds->tokenp[0].token == tAGO) {
464 			gds->RelSeconds = -gds->RelSeconds;
465 			gds->RelMonth = -gds->RelMonth;
466 			gds->tokenp += 1;
467 		}
468 		return 1;
469 	}
470 
471 	/* Bare numbers sometimes have meaning. */
472 	if (gds->tokenp[0].token == tUNUMBER) {
473 		if (gds->HaveTime && !gds->HaveYear && !gds->HaveRel) {
474 			gds->HaveYear++;
475 			gds->Year = gds->tokenp[0].value;
476 			gds->tokenp += 1;
477 			return 1;
478 		}
479 
480 		if(gds->tokenp[0].value > 10000) {
481 			/* "20040301" */
482 			gds->HaveYear++;
483 			gds->HaveMonth++;
484 			gds->HaveDay++;
485 			gds->Day= (gds->tokenp[0].value)%100;
486 			gds->Month= (gds->tokenp[0].value/100)%100;
487 			gds->Year = gds->tokenp[0].value/10000;
488 			gds->tokenp += 1;
489 			return 1;
490 		}
491 
492 		if (gds->tokenp[0].value < 24) {
493 			gds->HaveTime++;
494 			gds->Hour = gds->tokenp[0].value;
495 			gds->Minutes = 0;
496 			gds->Seconds = 0;
497 			gds->tokenp += 1;
498 			return 1;
499 		}
500 
501 		if ((gds->tokenp[0].value / 100 < 24)
502 		    && (gds->tokenp[0].value % 100 < 60)) {
503 			/* "513" is same as "5:13" */
504 			gds->Hour = gds->tokenp[0].value / 100;
505 			gds->Minutes = gds->tokenp[0].value % 100;
506 			gds->Seconds = 0;
507 			gds->tokenp += 1;
508 			return 1;
509 		}
510 	}
511 
512 	return 0;
513 }
514 
515 /*
516  * A dictionary of time words.
517  */
518 static struct LEXICON {
519 	size_t		abbrev;
520 	const char	*name;
521 	int		type;
522 	time_t		value;
523 } const TimeWords[] = {
524 	/* am/pm */
525 	{ 0, "am",		tAMPM,	tAM },
526 	{ 0, "pm",		tAMPM,	tPM },
527 
528 	/* Month names. */
529 	{ 3, "january",		tMONTH,  1 },
530 	{ 3, "february",	tMONTH,  2 },
531 	{ 3, "march",		tMONTH,  3 },
532 	{ 3, "april",		tMONTH,  4 },
533 	{ 3, "may",		tMONTH,  5 },
534 	{ 3, "june",		tMONTH,  6 },
535 	{ 3, "july",		tMONTH,  7 },
536 	{ 3, "august",		tMONTH,  8 },
537 	{ 3, "september",	tMONTH,  9 },
538 	{ 3, "october",		tMONTH, 10 },
539 	{ 3, "november",	tMONTH, 11 },
540 	{ 3, "december",	tMONTH, 12 },
541 
542 	/* Days of the week. */
543 	{ 2, "sunday",		tDAY, 0 },
544 	{ 3, "monday",		tDAY, 1 },
545 	{ 2, "tuesday",		tDAY, 2 },
546 	{ 3, "wednesday",	tDAY, 3 },
547 	{ 2, "thursday",	tDAY, 4 },
548 	{ 2, "friday",		tDAY, 5 },
549 	{ 2, "saturday",	tDAY, 6 },
550 
551 	/* Timezones: Offsets are in seconds. */
552 	{ 0, "gmt",  tZONE,     0*HOUR }, /* Greenwich Mean */
553 	{ 0, "ut",   tZONE,     0*HOUR }, /* Universal (Coordinated) */
554 	{ 0, "utc",  tZONE,     0*HOUR },
555 	{ 0, "wet",  tZONE,     0*HOUR }, /* Western European */
556 	{ 0, "bst",  tDAYZONE,  0*HOUR }, /* British Summer */
557 	{ 0, "wat",  tZONE,     1*HOUR }, /* West Africa */
558 	{ 0, "at",   tZONE,     2*HOUR }, /* Azores */
559 	/* { 0, "bst", tZONE, 3*HOUR }, */ /* Brazil Standard: Conflict */
560 	/* { 0, "gst", tZONE, 3*HOUR }, */ /* Greenland Standard: Conflict*/
561 	{ 0, "nft",  tZONE,     3*HOUR+30*MINUTE }, /* Newfoundland */
562 	{ 0, "nst",  tZONE,     3*HOUR+30*MINUTE }, /* Newfoundland Standard */
563 	{ 0, "ndt",  tDAYZONE,  3*HOUR+30*MINUTE }, /* Newfoundland Daylight */
564 	{ 0, "ast",  tZONE,     4*HOUR }, /* Atlantic Standard */
565 	{ 0, "adt",  tDAYZONE,  4*HOUR }, /* Atlantic Daylight */
566 	{ 0, "est",  tZONE,     5*HOUR }, /* Eastern Standard */
567 	{ 0, "edt",  tDAYZONE,  5*HOUR }, /* Eastern Daylight */
568 	{ 0, "cst",  tZONE,     6*HOUR }, /* Central Standard */
569 	{ 0, "cdt",  tDAYZONE,  6*HOUR }, /* Central Daylight */
570 	{ 0, "mst",  tZONE,     7*HOUR }, /* Mountain Standard */
571 	{ 0, "mdt",  tDAYZONE,  7*HOUR }, /* Mountain Daylight */
572 	{ 0, "pst",  tZONE,     8*HOUR }, /* Pacific Standard */
573 	{ 0, "pdt",  tDAYZONE,  8*HOUR }, /* Pacific Daylight */
574 	{ 0, "yst",  tZONE,     9*HOUR }, /* Yukon Standard */
575 	{ 0, "ydt",  tDAYZONE,  9*HOUR }, /* Yukon Daylight */
576 	{ 0, "hst",  tZONE,     10*HOUR }, /* Hawaii Standard */
577 	{ 0, "hdt",  tDAYZONE,  10*HOUR }, /* Hawaii Daylight */
578 	{ 0, "cat",  tZONE,     10*HOUR }, /* Central Alaska */
579 	{ 0, "ahst", tZONE,     10*HOUR }, /* Alaska-Hawaii Standard */
580 	{ 0, "nt",   tZONE,     11*HOUR }, /* Nome */
581 	{ 0, "idlw", tZONE,     12*HOUR }, /* Intl Date Line West */
582 	{ 0, "cet",  tZONE,     -1*HOUR }, /* Central European */
583 	{ 0, "met",  tZONE,     -1*HOUR }, /* Middle European */
584 	{ 0, "mewt", tZONE,     -1*HOUR }, /* Middle European Winter */
585 	{ 0, "mest", tDAYZONE,  -1*HOUR }, /* Middle European Summer */
586 	{ 0, "swt",  tZONE,     -1*HOUR }, /* Swedish Winter */
587 	{ 0, "sst",  tDAYZONE,  -1*HOUR }, /* Swedish Summer */
588 	{ 0, "fwt",  tZONE,     -1*HOUR }, /* French Winter */
589 	{ 0, "fst",  tDAYZONE,  -1*HOUR }, /* French Summer */
590 	{ 0, "eet",  tZONE,     -2*HOUR }, /* Eastern Eur, USSR Zone 1 */
591 	{ 0, "bt",   tZONE,     -3*HOUR }, /* Baghdad, USSR Zone 2 */
592 	{ 0, "it",   tZONE,     -3*HOUR-30*MINUTE },/* Iran */
593 	{ 0, "zp4",  tZONE,     -4*HOUR }, /* USSR Zone 3 */
594 	{ 0, "zp5",  tZONE,     -5*HOUR }, /* USSR Zone 4 */
595 	{ 0, "ist",  tZONE,     -5*HOUR-30*MINUTE },/* Indian Standard */
596 	{ 0, "zp6",  tZONE,     -6*HOUR }, /* USSR Zone 5 */
597 	/* { 0, "nst",  tZONE, -6.5*HOUR }, */ /* North Sumatra: Conflict */
598 	/* { 0, "sst", tZONE, -7*HOUR }, */ /* So Sumatra, USSR 6: Conflict */
599 	{ 0, "wast", tZONE,     -7*HOUR }, /* West Australian Standard */
600 	{ 0, "wadt", tDAYZONE,  -7*HOUR }, /* West Australian Daylight */
601 	{ 0, "jt",   tZONE,     -7*HOUR-30*MINUTE },/* Java (3pm in Cronusland!)*/
602 	{ 0, "cct",  tZONE,     -8*HOUR }, /* China Coast, USSR Zone 7 */
603 	{ 0, "jst",  tZONE,     -9*HOUR }, /* Japan Std, USSR Zone 8 */
604 	{ 0, "cast", tZONE,     -9*HOUR-30*MINUTE },/* Ctrl Australian Std */
605 	{ 0, "cadt", tDAYZONE,  -9*HOUR-30*MINUTE },/* Ctrl Australian Daylt */
606 	{ 0, "east", tZONE,     -10*HOUR }, /* Eastern Australian Std */
607 	{ 0, "eadt", tDAYZONE,  -10*HOUR }, /* Eastern Australian Daylt */
608 	{ 0, "gst",  tZONE,     -10*HOUR }, /* Guam Std, USSR Zone 9 */
609 	{ 0, "nzt",  tZONE,     -12*HOUR }, /* New Zealand */
610 	{ 0, "nzst", tZONE,     -12*HOUR }, /* New Zealand Standard */
611 	{ 0, "nzdt", tDAYZONE,  -12*HOUR }, /* New Zealand Daylight */
612 	{ 0, "idle", tZONE,     -12*HOUR }, /* Intl Date Line East */
613 
614 	{ 0, "dst",  tDST,		0 },
615 
616 	/* Time units. */
617 	{ 4, "years",		tMONTH_UNIT,	12 },
618 	{ 5, "months",		tMONTH_UNIT,	1 },
619 	{ 9, "fortnights",	tSEC_UNIT,	14 * DAY },
620 	{ 4, "weeks",		tSEC_UNIT,	7 * DAY },
621 	{ 3, "days",		tSEC_UNIT,	DAY },
622 	{ 4, "hours",		tSEC_UNIT,	HOUR },
623 	{ 3, "minutes",		tSEC_UNIT,	MINUTE },
624 	{ 3, "seconds",		tSEC_UNIT,	1 },
625 
626 	/* Relative-time words. */
627 	{ 0, "tomorrow",	tSEC_UNIT,	DAY },
628 	{ 0, "yesterday",	tSEC_UNIT,	-DAY },
629 	{ 0, "today",		tSEC_UNIT,	0 },
630 	{ 0, "now",		tSEC_UNIT,	0 },
631 	{ 0, "last",		tUNUMBER,	-1 },
632 	{ 0, "this",		tSEC_UNIT,	0 },
633 	{ 0, "next",		tUNUMBER,	2 },
634 	{ 0, "first",		tUNUMBER,	1 },
635 	{ 0, "1st",		tUNUMBER,	1 },
636 /*	{ 0, "second",		tUNUMBER,	2 }, */
637 	{ 0, "2nd",		tUNUMBER,	2 },
638 	{ 0, "third",		tUNUMBER,	3 },
639 	{ 0, "3rd",		tUNUMBER,	3 },
640 	{ 0, "fourth",		tUNUMBER,	4 },
641 	{ 0, "4th",		tUNUMBER,	4 },
642 	{ 0, "fifth",		tUNUMBER,	5 },
643 	{ 0, "5th",		tUNUMBER,	5 },
644 	{ 0, "sixth",		tUNUMBER,	6 },
645 	{ 0, "seventh",		tUNUMBER,	7 },
646 	{ 0, "eighth",		tUNUMBER,	8 },
647 	{ 0, "ninth",		tUNUMBER,	9 },
648 	{ 0, "tenth",		tUNUMBER,	10 },
649 	{ 0, "eleventh",	tUNUMBER,	11 },
650 	{ 0, "twelfth",		tUNUMBER,	12 },
651 	{ 0, "ago",		tAGO,		1 },
652 
653 	/* Military timezones. */
654 	{ 0, "a",	tZONE,	1*HOUR },
655 	{ 0, "b",	tZONE,	2*HOUR },
656 	{ 0, "c",	tZONE,	3*HOUR },
657 	{ 0, "d",	tZONE,	4*HOUR },
658 	{ 0, "e",	tZONE,	5*HOUR },
659 	{ 0, "f",	tZONE,	6*HOUR },
660 	{ 0, "g",	tZONE,	7*HOUR },
661 	{ 0, "h",	tZONE,	8*HOUR },
662 	{ 0, "i",	tZONE,	9*HOUR },
663 	{ 0, "k",	tZONE,	10*HOUR },
664 	{ 0, "l",	tZONE,	11*HOUR },
665 	{ 0, "m",	tZONE,	12*HOUR },
666 	{ 0, "n",	tZONE,	-1*HOUR },
667 	{ 0, "o",	tZONE,	-2*HOUR },
668 	{ 0, "p",	tZONE,	-3*HOUR },
669 	{ 0, "q",	tZONE,	-4*HOUR },
670 	{ 0, "r",	tZONE,	-5*HOUR },
671 	{ 0, "s",	tZONE,	-6*HOUR },
672 	{ 0, "t",	tZONE,	-7*HOUR },
673 	{ 0, "u",	tZONE,	-8*HOUR },
674 	{ 0, "v",	tZONE,	-9*HOUR },
675 	{ 0, "w",	tZONE,	-10*HOUR },
676 	{ 0, "x",	tZONE,	-11*HOUR },
677 	{ 0, "y",	tZONE,	-12*HOUR },
678 	{ 0, "z",	tZONE,	0*HOUR },
679 
680 	/* End of table. */
681 	{ 0, NULL,	0,	0 }
682 };
683 
684 /*
685  * Year is either:
686  *  = A number from 0 to 99, which means a year from 1970 to 2069, or
687  *  = The actual year (>=100).
688  */
689 static time_t
690 Convert(time_t Month, time_t Day, time_t Year,
691 	time_t Hours, time_t Minutes, time_t Seconds,
692 	time_t Timezone, enum DSTMODE DSTmode)
693 {
694 	signed char DaysInMonth[12] = {
695 		31, 0, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31
696 	};
697 	time_t	Julian;
698 	int	i;
699 
700 	if (Year < 69)
701 		Year += 2000;
702 	else if (Year < 100)
703 		Year += 1900;
704 	DaysInMonth[1] = Year % 4 == 0 && (Year % 100 != 0 || Year % 400 == 0)
705 	    ? 29 : 28;
706 	/* Checking for 2038 bogusly assumes that time_t is 32 bits.  But
707 	   I'm too lazy to try to check for time_t overflow in another way.  */
708 	if (Year < EPOCH || Year > 2038
709 	    || Month < 1 || Month > 12
710 	    /* Lint fluff:  "conversion from long may lose accuracy" */
711 	    || Day < 1 || Day > DaysInMonth[(int)--Month]
712 	    || Hours < 0 || Hours > 23
713 	    || Minutes < 0 || Minutes > 59
714 	    || Seconds < 0 || Seconds > 59)
715 		return -1;
716 
717 	Julian = Day - 1;
718 	for (i = 0; i < Month; i++)
719 		Julian += DaysInMonth[i];
720 	for (i = EPOCH; i < Year; i++)
721 		Julian += 365 + (i % 4 == 0);
722 	Julian *= DAY;
723 	Julian += Timezone;
724 	Julian += Hours * HOUR + Minutes * MINUTE + Seconds;
725 	if (DSTmode == DSTon
726 	    || (DSTmode == DSTmaybe && localtime(&Julian)->tm_isdst))
727 		Julian -= HOUR;
728 	return Julian;
729 }
730 
731 
732 static time_t
733 DSTcorrect(time_t Start, time_t Future)
734 {
735 	time_t	StartDay;
736 	time_t	FutureDay;
737 
738 	StartDay = (localtime(&Start)->tm_hour + 1) % 24;
739 	FutureDay = (localtime(&Future)->tm_hour + 1) % 24;
740 	return (Future - Start) + (StartDay - FutureDay) * HOUR;
741 }
742 
743 
744 static time_t
745 RelativeDate(time_t Start, time_t zone, int dstmode,
746     time_t DayOrdinal, time_t DayNumber)
747 {
748 	struct tm	*tm;
749 	time_t	t, now;
750 
751 	t = Start - zone;
752 	tm = gmtime(&t);
753 	now = Start;
754 	now += DAY * ((DayNumber - tm->tm_wday + 7) % 7);
755 	now += 7 * DAY * (DayOrdinal <= 0 ? DayOrdinal : DayOrdinal - 1);
756 	if (dstmode == DSTmaybe)
757 		return DSTcorrect(Start, now);
758 	return now - Start;
759 }
760 
761 
762 static time_t
763 RelativeMonth(time_t Start, time_t Timezone, time_t RelMonth)
764 {
765 	struct tm	*tm;
766 	time_t	Month;
767 	time_t	Year;
768 
769 	if (RelMonth == 0)
770 		return 0;
771 	tm = localtime(&Start);
772 	Month = 12 * (tm->tm_year + 1900) + tm->tm_mon + RelMonth;
773 	Year = Month / 12;
774 	Month = Month % 12 + 1;
775 	return DSTcorrect(Start,
776 	    Convert(Month, (time_t)tm->tm_mday, Year,
777 		(time_t)tm->tm_hour, (time_t)tm->tm_min, (time_t)tm->tm_sec,
778 		Timezone, DSTmaybe));
779 }
780 
781 /*
782  * Tokenizer.
783  */
784 static int
785 nexttoken(const char **in, time_t *value)
786 {
787 	char	c;
788 	char	buff[64];
789 
790 	for ( ; ; ) {
791 		while (isspace((unsigned char)**in))
792 			++*in;
793 
794 		/* Skip parenthesized comments. */
795 		if (**in == '(') {
796 			int Count = 0;
797 			do {
798 				c = *(*in)++;
799 				if (c == '\0')
800 					return c;
801 				if (c == '(')
802 					Count++;
803 				else if (c == ')')
804 					Count--;
805 			} while (Count > 0);
806 			continue;
807 		}
808 
809 		/* Try the next token in the word table first. */
810 		/* This allows us to match "2nd", for example. */
811 		{
812 			const char *src = *in;
813 			const struct LEXICON *tp;
814 			unsigned i = 0;
815 
816 			/* Force to lowercase and strip '.' characters. */
817 			while (*src != '\0'
818 			    && (isalnum((unsigned char)*src) || *src == '.')
819 			    && i < sizeof(buff)-1) {
820 				if (*src != '.') {
821 					if (isupper((unsigned char)*src))
822 						buff[i++] = tolower((unsigned char)*src);
823 					else
824 						buff[i++] = *src;
825 				}
826 				src++;
827 			}
828 			buff[i] = '\0';
829 
830 			/*
831 			 * Find the first match.  If the word can be
832 			 * abbreviated, make sure we match at least
833 			 * the minimum abbreviation.
834 			 */
835 			for (tp = TimeWords; tp->name; tp++) {
836 				size_t abbrev = tp->abbrev;
837 				if (abbrev == 0)
838 					abbrev = strlen(tp->name);
839 				if (strlen(buff) >= abbrev
840 				    && strncmp(tp->name, buff, strlen(buff))
841 				    	== 0) {
842 					/* Skip over token. */
843 					*in = src;
844 					/* Return the match. */
845 					*value = tp->value;
846 					return tp->type;
847 				}
848 			}
849 		}
850 
851 		/*
852 		 * Not in the word table, maybe it's a number.  Note:
853 		 * Because '-' and '+' have other special meanings, I
854 		 * don't deal with signed numbers here.
855 		 */
856 		if (isdigit((unsigned char)(c = **in))) {
857 			for (*value = 0; isdigit((unsigned char)(c = *(*in)++)); )
858 				*value = 10 * *value + c - '0';
859 			(*in)--;
860 			return (tUNUMBER);
861 		}
862 
863 		return *(*in)++;
864 	}
865 }
866 
867 #define	TM_YEAR_ORIGIN 1900
868 
869 /* Yield A - B, measured in seconds.  */
870 static long
871 difftm (struct tm *a, struct tm *b)
872 {
873 	int ay = a->tm_year + (TM_YEAR_ORIGIN - 1);
874 	int by = b->tm_year + (TM_YEAR_ORIGIN - 1);
875 	int days = (
876 		/* difference in day of year */
877 		a->tm_yday - b->tm_yday
878 		/* + intervening leap days */
879 		+  ((ay >> 2) - (by >> 2))
880 		-  (ay/100 - by/100)
881 		+  ((ay/100 >> 2) - (by/100 >> 2))
882 		/* + difference in years * 365 */
883 		+  (long)(ay-by) * 365
884 		);
885 	return (days * DAY + (a->tm_hour - b->tm_hour) * HOUR
886 	    + (a->tm_min - b->tm_min) * MINUTE
887 	    + (a->tm_sec - b->tm_sec));
888 }
889 
890 /*
891  *
892  * The public function.
893  *
894  * TODO: tokens[] array should be dynamically sized.
895  */
896 time_t
897 __archive_get_date(time_t now, const char *p)
898 {
899 	struct token	tokens[256];
900 	struct gdstate	_gds;
901 	struct token	*lasttoken;
902 	struct gdstate	*gds;
903 	struct tm	local, *tm;
904 	struct tm	gmt, *gmt_ptr;
905 	time_t		Start;
906 	time_t		tod;
907 	long		tzone;
908 
909 	/* Clear out the parsed token array. */
910 	memset(tokens, 0, sizeof(tokens));
911 	/* Initialize the parser state. */
912 	memset(&_gds, 0, sizeof(_gds));
913 	gds = &_gds;
914 
915 	/* Look up the current time. */
916 	memset(&local, 0, sizeof(local));
917 	tm = localtime (&now);
918 	if (tm == NULL)
919 		return -1;
920 	local = *tm;
921 
922 	/* Look up UTC if we can and use that to determine the current
923 	 * timezone offset. */
924 	memset(&gmt, 0, sizeof(gmt));
925 	gmt_ptr = gmtime (&now);
926 	if (gmt_ptr != NULL) {
927 		/* Copy, in case localtime and gmtime use the same buffer. */
928 		gmt = *gmt_ptr;
929 	}
930 	if (gmt_ptr != NULL)
931 		tzone = difftm (&gmt, &local);
932 	else
933 		/* This system doesn't understand timezones; fake it. */
934 		tzone = 0;
935 	if(local.tm_isdst)
936 		tzone += HOUR;
937 
938 	/* Tokenize the input string. */
939 	lasttoken = tokens;
940 	while ((lasttoken->token = nexttoken(&p, &lasttoken->value)) != 0) {
941 		++lasttoken;
942 		if (lasttoken > tokens + 255)
943 			return -1;
944 	}
945 	gds->tokenp = tokens;
946 
947 	/* Match phrases until we run out of input tokens. */
948 	while (gds->tokenp < lasttoken) {
949 		if (!phrase(gds))
950 			return -1;
951 	}
952 
953 	/* Use current local timezone if none was specified. */
954 	if (!gds->HaveZone) {
955 		gds->Timezone = tzone;
956 		gds->DSTmode = DSTmaybe;
957 	}
958 
959 	/* If a timezone was specified, use that for generating the default
960 	 * time components instead of the local timezone. */
961 	if (gds->HaveZone && gmt_ptr != NULL) {
962 		now -= gds->Timezone;
963 		gmt_ptr = gmtime (&now);
964 		if (gmt_ptr != NULL)
965 			local = *gmt_ptr;
966 		now += gds->Timezone;
967 	}
968 
969 	if (!gds->HaveYear)
970 		gds->Year = local.tm_year + 1900;
971 	if (!gds->HaveMonth)
972 		gds->Month = local.tm_mon + 1;
973 	if (!gds->HaveDay)
974 		gds->Day = local.tm_mday;
975 	/* Note: No default for hour/min/sec; a specifier that just
976 	 * gives date always refers to 00:00 on that date. */
977 
978 	/* If we saw more than one time, timezone, weekday, year, month,
979 	 * or day, then give up. */
980 	if (gds->HaveTime > 1 || gds->HaveZone > 1 || gds->HaveWeekDay > 1
981 	    || gds->HaveYear > 1 || gds->HaveMonth > 1 || gds->HaveDay > 1)
982 		return -1;
983 
984 	/* Compute an absolute time based on whatever absolute information
985 	 * we collected. */
986 	if (gds->HaveYear || gds->HaveMonth || gds->HaveDay
987 	    || gds->HaveTime || gds->HaveWeekDay) {
988 		Start = Convert(gds->Month, gds->Day, gds->Year,
989 		    gds->Hour, gds->Minutes, gds->Seconds,
990 		    gds->Timezone, gds->DSTmode);
991 		if (Start < 0)
992 			return -1;
993 	} else {
994 		Start = now;
995 		if (!gds->HaveRel)
996 			Start -= local.tm_hour * HOUR + local.tm_min * MINUTE
997 			    + local.tm_sec;
998 	}
999 
1000 	/* Add the relative offset. */
1001 	Start += gds->RelSeconds;
1002 	Start += RelativeMonth(Start, gds->Timezone, gds->RelMonth);
1003 
1004 	/* Adjust for day-of-week offsets. */
1005 	if (gds->HaveWeekDay
1006 	    && !(gds->HaveYear || gds->HaveMonth || gds->HaveDay)) {
1007 		tod = RelativeDate(Start, gds->Timezone,
1008 		    gds->DSTmode, gds->DayOrdinal, gds->DayNumber);
1009 		Start += tod;
1010 	}
1011 
1012 	/* -1 is an error indicator, so return 0 instead of -1 if
1013 	 * that's the actual time. */
1014 	return Start == -1 ? 0 : Start;
1015 }
1016 
1017 
1018 #if	defined(TEST)
1019 
1020 /* ARGSUSED */
1021 int
1022 main(int argc, char **argv)
1023 {
1024     time_t	d;
1025     time_t	now = time(NULL);
1026 
1027     while (*++argv != NULL) {
1028 	    (void)printf("Input: %s\n", *argv);
1029 	    d = get_date(now, *argv);
1030 	    if (d == -1)
1031 		    (void)printf("Bad format - couldn't convert.\n");
1032 	    else
1033 		    (void)printf("Output: %s\n", ctime(&d));
1034     }
1035     exit(0);
1036     /* NOTREACHED */
1037 }
1038 #endif	/* defined(TEST) */
1039