xref: /freebsd/contrib/tzdata/ziguard.awk (revision 13227efc5b034bfa31c90a3b75f9d006c77c6c90)
1# Convert tzdata source into vanguard or rearguard form.
2
3# Contributed by Paul Eggert.  This file is in the public domain.
4
5# This is not a general-purpose converter; it is designed for current tzdata.
6# It just converts from current source to main, vanguard, and rearguard forms.
7# Although it might be nice for it to be idempotent, or to be useful
8# for converting back and forth between formats,
9# it does not do these nonessential tasks now.
10#
11# This script can convert from main to vanguard form and vice versa.
12# There is no need to convert rearguard to other forms.
13#
14# When converting to vanguard form, the output can use the line
15# "Zone GMT 0 - GMT" which TZUpdater 2.3.2 mistakenly rejects.
16#
17# When converting to vanguard form, the output can use negative SAVE
18# values.
19#
20# When converting to rearguard form, the output uses only nonnegative
21# SAVE values.  The idea is for the output data to simulate the behavior
22# of the input data as best it can within the constraints of the
23# rearguard format.
24
25# Given a FIELD like "-0:30", return a minute count like -30.
26function get_minutes(field, \
27		     sign, hours, minutes)
28{
29  sign = field ~ /^-/ ? -1 : 1
30  hours = +field
31  if (field ~ /:/) {
32    minutes = field
33    sub(/[^:]*:/, "", minutes)
34  }
35  return 60 * hours + sign * minutes
36}
37
38# Given an OFFSET, which is a minute count like 300 or 330,
39# return a %z-style abbreviation like "+05" or "+0530".
40function offset_abbr(offset, \
41		     hours, minutes, sign)
42{
43  hours = int(offset / 60)
44  minutes = offset % 60
45  if (minutes) {
46    return sprintf("%+.4d", hours * 100 + minutes);
47  } else {
48    return sprintf("%+.2d", hours)
49  }
50}
51
52# Round TIMESTAMP (a +-hh:mm:ss.dddd string) to the nearest second.
53function round_to_second(timestamp, \
54			 hh, mm, ss, seconds, dot_dddd, subseconds)
55{
56  dot_dddd = timestamp
57  if (!sub(/^[+-]?[0-9]+:[0-9]+:[0-9]+\./, ".", dot_dddd))
58    return timestamp
59  hh = mm = ss = timestamp
60  sub(/^[-+]?[0-9]+:[0-9]+:/, "", ss)
61  sub(/^[-+]?[0-9]+:/, "", mm)
62  sub(/^[-+]?/, "", hh)
63  seconds = 3600 * hh + 60 * mm + ss
64  subseconds = +dot_dddd
65  seconds += 0.5 < subseconds || ((subseconds == 0.5) && (seconds % 2));
66  return sprintf("%s%d:%.2d:%.2d", timestamp ~ /^-/ ? "-" : "", \
67		 seconds / 3600, seconds / 60 % 60, seconds % 60)
68}
69
70BEGIN {
71  dataform_type["vanguard"] = 1
72  dataform_type["main"] = 1
73  dataform_type["rearguard"] = 1
74
75  if (PACKRATLIST) {
76    while (getline <PACKRATLIST) {
77      if ($0 ~ /^#/) continue
78      packratlist[$3] = 1
79    }
80  }
81
82  # The command line should set DATAFORM.
83  if (!dataform_type[DATAFORM]) exit 1
84}
85
86$1 == "#PACKRATLIST" && $2 == PACKRATLIST {
87  sub(/^#PACKRATLIST[\t ]+[^\t ]+[\t ]+/, "")
88}
89
90/^Zone/ { zone = $2 }
91
92DATAFORM != "main" {
93  in_comment = $0 ~ /^#/
94  uncomment = comment_out = 0
95
96  # If this line should differ due to Czechoslovakia using negative SAVE values,
97  # uncomment the desired version and comment out the undesired one.
98  if (zone == "Europe/Prague" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
99      && $0 ~ /1947 Feb 23/) {
100    if (($(in_comment + 2) != "-") == (DATAFORM != "rearguard")) {
101      uncomment = in_comment
102    } else {
103      comment_out = !in_comment
104    }
105  }
106
107  # If this line should differ due to Ireland using negative SAVE values,
108  # uncomment the desired version and comment out the undesired one.
109  Rule_Eire = $0 ~ /^#?Rule[\t ]+Eire[\t ]/
110  Zone_Dublin_post_1968 \
111    = (zone == "Europe/Dublin" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \
112       && (!$(in_comment + 4) || 1968 < $(in_comment + 4)))
113  if (Rule_Eire || Zone_Dublin_post_1968) {
114    if ((Rule_Eire \
115	 || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT"))	\
116	== (DATAFORM != "rearguard")) {
117      uncomment = in_comment
118    } else {
119      comment_out = !in_comment
120    }
121  }
122
123  # If this line should differ due to Namibia using negative SAVE values,
124  # uncomment the desired version and comment out the undesired one.
125  Rule_Namibia = $0 ~ /^#?Rule[\t ]+Namibia[\t ]/
126  Zone_using_Namibia_rule \
127    = (zone == "Africa/Windhoek" && $0 ~ /^#?[\t ]+[12]:00[\t ]/ \
128       && ($(in_comment + 2) == "Namibia" \
129	   || ($(in_comment + 2) == "-" && $(in_comment + 3) == "CAT" \
130	       && ((1994 <= $(in_comment + 4) && $(in_comment + 4) <= 2017) \
131		   || in_comment + 3 == NF))))
132  if (Rule_Namibia || Zone_using_Namibia_rule) {
133    if ((Rule_Namibia \
134	 ? ($9 ~ /^-/ || ($9 == 0 && $10 == "CAT")) \
135	 : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \
136	== (DATAFORM != "rearguard")) {
137      uncomment = in_comment
138    } else {
139      comment_out = !in_comment
140    }
141  }
142
143  # If this line should differ due to Portugal benefiting from %z if supported,
144  # comment out the undesired version and uncomment the desired one.
145  if ($0 ~ /^#?[\t ]+-[12]:00[\t ]+((Port|W-Eur)[\t ]+[%+-]|-[\t ]+(%z|-01)[\t ]+1982 Mar 28)/) {
146    if (($0 ~ /%z/) == (DATAFORM == "rearguard")) {
147      comment_out = !in_comment
148    } else {
149      uncomment = in_comment
150    }
151  }
152
153  # In vanguard form, use the line "Zone GMT 0 - GMT" instead of
154  # "Zone Etc/GMT 0 - GMT" and adjust Link lines accordingly.
155  # This works around a bug in TZUpdater 2.3.2.
156  if (/^#?(Zone|Link)[\t ]+(Etc\/)?GMT[\t ]/) {
157    if (($2 == "GMT") == (DATAFORM == "vanguard")) {
158      uncomment = in_comment
159    } else {
160      comment_out = !in_comment
161    }
162  }
163
164  if (uncomment) {
165    sub(/^#/, "")
166  }
167  if (comment_out) {
168    sub(/^/, "#")
169  }
170
171  # Prefer explicit abbreviations in rearguard form, %z otherwise.
172  if (DATAFORM == "rearguard") {
173    if ($0 ~ /^[^#]*%z/) {
174      stdoff_column = 2 * ($0 ~ /^Zone/) + 1
175      rules_column = stdoff_column + 1
176      stdoff = get_minutes($stdoff_column)
177      rules = $rules_column
178      stdabbr = offset_abbr(stdoff)
179      if (rules == "-") {
180	abbr = stdabbr
181      } else {
182	dstabbr_only = rules ~ /^[+0-9-]/
183	if (dstabbr_only) {
184	  dstoff = get_minutes(rules)
185	} else {
186	  # The DST offset is normally an hour, but there are special cases.
187	  if (rules == "Morocco" && NF == 3) {
188	    dstoff = -60
189	  } else if (rules == "NBorneo") {
190	    dstoff = 20
191	  } else if (((rules == "Cook" || rules == "LH") && NF == 3) \
192		     || (rules == "Uruguay" \
193			 && $0 ~ /[\t ](1942 Dec 14|1960|1970|1974 Dec 22)$/)) {
194	    dstoff = 30
195	  } else if (rules == "Uruguay" && $0 ~ /[\t ]1974 Mar 10$/) {
196	    dstoff = 90
197	  } else {
198	    dstoff = 60
199	  }
200	}
201	dstabbr = offset_abbr(stdoff + dstoff)
202	if (dstabbr_only) {
203	  abbr = dstabbr
204	} else {
205	  abbr = stdabbr "/" dstabbr
206	}
207      }
208      sub(/%z/, abbr)
209    }
210  } else {
211    sub(/^(Zone[\t ]+[^\t ]+)?[\t ]+[^\t ]+[\t ]+[^\t ]+[\t ]+[-+][^\t ]+/, \
212	"&CHANGE-TO-%z")
213    sub(/-00CHANGE-TO-%z/, "-00")
214    sub(/[-+][^\t ]+CHANGE-TO-/, "")
215  }
216
217  # Normally, prefer whole seconds.  However, prefer subseconds
218  # if generating vanguard form and the otherwise-undocumented
219  # VANGUARD_SUBSECONDS environment variable is set.
220  # This relies on #STDOFF comment lines in the data.
221  # It is for hypothetical clients that support UT offsets that are
222  # not integer multiples of one second (e.g., Europe/Lisbon, 1884 to 1912).
223  # No known clients need this currently, and this experimental
224  # feature may be changed or withdrawn in future releases.
225  if ($1 == "#STDOFF") {
226    stdoff = $2
227    rounded_stdoff = round_to_second(stdoff)
228    if (DATAFORM == "vanguard" && ENVIRON["VANGUARD_SUBSECONDS"]) {
229      stdoff_subst[0] = rounded_stdoff
230      stdoff_subst[1] = stdoff
231    } else {
232      stdoff_subst[0] = stdoff
233      stdoff_subst[1] = rounded_stdoff
234    }
235  } else if (stdoff_subst[0]) {
236    stdoff_column = 2 * ($0 ~ /^Zone/) + 1
237    stdoff_column_val = $stdoff_column
238    if (stdoff_column_val == stdoff_subst[0]) {
239      sub(stdoff_subst[0], stdoff_subst[1])
240    } else if (stdoff_column_val != stdoff_subst[1]) {
241      stdoff_subst[0] = 0
242    }
243  }
244
245  # In rearguard form, change the Japan rule line with "Sat>=8 25:00"
246  # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java.
247  if ($0 ~ /^Rule/ && $2 == "Japan") {
248    if (DATAFORM == "rearguard") {
249      if ($7 == "Sat>=8" && $8 == "25:00") {
250	sub(/Sat>=8/, "Sun>=9")
251	sub(/25:00/, " 1:00")
252      }
253    } else {
254      if ($7 == "Sun>=9" && $8 == "1:00") {
255	sub(/Sun>=9/, "Sat>=8")
256	sub(/ 1:00/, "25:00")
257      }
258    }
259  }
260
261  # In rearguard form, change the Morocco lines with negative SAVE values
262  # to use positive SAVE values.
263  if ($2 == "Morocco") {
264    if ($0 ~ /^Rule/) {
265      if ($4 ~ /^201[78]$/ && $6 == "Oct") {
266	if (DATAFORM == "rearguard") {
267	  sub(/\t2018\t/, "\t2017\t")
268	} else {
269	  sub(/\t2017\t/, "\t2018\t")
270	}
271      }
272
273      if (2019 <= $3) {
274	if ($8 == "2:00") {
275	  if (DATAFORM == "rearguard") {
276	    sub(/\t0\t/, "\t1:00\t")
277	  } else {
278	    sub(/\t1:00\t/, "\t0\t")
279	  }
280	} else {
281	  if (DATAFORM == "rearguard") {
282	    sub(/\t-1:00\t/, "\t0\t")
283	  } else {
284	    sub(/\t0\t/, "\t-1:00\t")
285	  }
286	}
287      }
288    }
289    if ($1 ~ /^[+0-9-]/ && NF == 3) {
290      if (DATAFORM == "rearguard") {
291	sub(/1:00\tMorocco/, "0:00\tMorocco")
292	sub(/\t\+01\/\+00$/, "\t+00/+01")
293      } else {
294	sub(/0:00\tMorocco/, "1:00\tMorocco")
295	sub(/\t\+00\/+01$/, "\t+01/+00")
296      }
297    }
298  }
299}
300
301/^Zone/ {
302  packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2];
303}
304{
305  if (packrat_ignored && $0 !~ /^Rule/) {
306    sub(/^/, "#")
307  }
308}
309
310# Return a link line resulting by changing OLDLINE to link to TARGET
311# from LINKNAME, instead of linking to OLDTARGET from LINKNAME.
312# Align data columns the same as they were in OLDLINE.
313# Also, replace any existing white space followed by comment with COMMENT.
314function make_linkline(oldline, target, linkname, oldtarget, comment, \
315		       oldprefix, oldprefixlen, oldtargettabs, \
316		       replsuffix, targettabs)
317{
318  oldprefix = "Link\t" oldtarget "\t"
319  oldprefixlen = length(oldprefix)
320  if (substr(oldline, 1, oldprefixlen) == oldprefix) {
321    # Use tab stops to preserve LINKNAME's column.
322    replsuffix = substr(oldline, oldprefixlen + 1)
323    sub(/[\t ]*#.*/, "", replsuffix)
324    oldtargettabs = int(length(oldtarget) / 8) + 1
325    targettabs = int(length(target) / 8) + 1
326    for (; targettabs < oldtargettabs; targettabs++) {
327      replsuffix = "\t" replsuffix
328    }
329    for (; oldtargettabs < targettabs && replsuffix ~ /^\t/; targettabs--) {
330      replsuffix = substr(replsuffix, 2)
331    }
332  } else {
333    # Odd format line; don't bother lining up its replacement nicely.
334    replsuffix = linkname
335  }
336  return "Link\t" target "\t" replsuffix comment
337}
338
339/^Link/ && $4 == "#=" && DATAFORM == "vanguard" {
340  $0 = make_linkline($0, $5, $3, $2)
341}
342
343# If a Link line is followed by a Link or Zone line for the same data, comment
344# out the Link line.  This can happen if backzone overrides a Link
345# with a Zone or a different Link.
346/^Zone/ {
347  sub(/^Link/, "#Link", line[linkline[$2]])
348}
349/^Link/ {
350  sub(/^Link/, "#Link", line[linkline[$3]])
351  linkline[$3] = NR
352  linktarget[$3] = $2
353}
354
355{ line[NR] = $0 }
356
357function cut_link_chains_short( \
358			       l, linkname, t, target)
359{
360  for (linkname in linktarget) {
361    target = linktarget[linkname]
362    t = linktarget[target]
363    if (t) {
364      # TARGET is itself a link name.  Replace the line "Link TARGET LINKNAME"
365      # with "Link T LINKNAME #= TARGET", where T is at the end of the chain
366      # of links that LINKNAME points to.
367      while ((u = linktarget[t])) {
368	t = u
369      }
370      l = linkline[linkname]
371      line[l] = make_linkline(line[l], t, linkname, target, "\t#= " target)
372    }
373  }
374}
375
376END {
377  if (DATAFORM != "vanguard") {
378    cut_link_chains_short()
379  }
380  for (i = 1; i <= NR; i++)
381    print line[i]
382}
383