1# Convert tzdata source into vanguard or rearguard form. 2 3# Contributed by Paul Eggert. This file is in the public domain. 4 5# This is not a general-purpose converter; it is designed for current tzdata. 6# It just converts from current source to main, vanguard, and rearguard forms. 7# Although it might be nice for it to be idempotent, or to be useful 8# for converting back and forth between formats, 9# it does not do these nonessential tasks now. 10# 11# This script can convert from main to vanguard form and vice versa. 12# There is no need to convert rearguard to other forms. 13# 14# When converting to vanguard form, the output can use the line 15# "Zone GMT 0 - GMT" which TZUpdater 2.3.2 mistakenly rejects. 16# 17# When converting to vanguard form, the output can use negative SAVE 18# values. 19# 20# When converting to rearguard form, the output uses only nonnegative 21# SAVE values. The idea is for the output data to simulate the behavior 22# of the input data as best it can within the constraints of the 23# rearguard format. 24 25# Given a FIELD like "-0:30", return a minute count like -30. 26function get_minutes(field, \ 27 sign, hours, minutes) 28{ 29 sign = field ~ /^-/ ? -1 : 1 30 hours = +field 31 if (field ~ /:/) { 32 minutes = field 33 sub(/[^:]*:/, "", minutes) 34 } 35 return 60 * hours + sign * minutes 36} 37 38# Given an OFFSET, which is a minute count like 300 or 330, 39# return a %z-style abbreviation like "+05" or "+0530". 40function offset_abbr(offset, \ 41 hours, minutes, sign) 42{ 43 hours = int(offset / 60) 44 minutes = offset % 60 45 if (minutes) { 46 return sprintf("%+.4d", hours * 100 + minutes); 47 } else { 48 return sprintf("%+.2d", hours) 49 } 50} 51 52# Round TIMESTAMP (a +-hh:mm:ss.dddd string) to the nearest second. 53function round_to_second(timestamp, \ 54 hh, mm, ss, seconds, dot_dddd, subseconds) 55{ 56 dot_dddd = timestamp 57 if (!sub(/^[+-]?[0-9]+:[0-9]+:[0-9]+\./, ".", dot_dddd)) 58 return timestamp 59 hh = mm = ss = timestamp 60 sub(/^[-+]?[0-9]+:[0-9]+:/, "", ss) 61 sub(/^[-+]?[0-9]+:/, "", mm) 62 sub(/^[-+]?/, "", hh) 63 seconds = 3600 * hh + 60 * mm + ss 64 subseconds = +dot_dddd 65 seconds += 0.5 < subseconds || ((subseconds == 0.5) && (seconds % 2)); 66 return sprintf("%s%d:%.2d:%.2d", timestamp ~ /^-/ ? "-" : "", \ 67 seconds / 3600, seconds / 60 % 60, seconds % 60) 68} 69 70BEGIN { 71 dataform_type["vanguard"] = 1 72 dataform_type["main"] = 1 73 dataform_type["rearguard"] = 1 74 75 if (PACKRATLIST) { 76 while (getline <PACKRATLIST) { 77 if ($0 ~ /^#/) continue 78 packratlist[$3] = 1 79 } 80 } 81 82 # The command line should set DATAFORM. 83 if (!dataform_type[DATAFORM]) exit 1 84} 85 86$1 == "#PACKRATLIST" && $2 == PACKRATLIST { 87 sub(/^#PACKRATLIST[\t ]+[^\t ]+[\t ]+/, "") 88} 89 90/^Zone/ { zone = $2 } 91 92DATAFORM != "main" { 93 in_comment = $0 ~ /^#/ 94 uncomment = comment_out = 0 95 96 # If this line should differ due to Czechoslovakia using negative SAVE values, 97 # uncomment the desired version and comment out the undesired one. 98 if (zone == "Europe/Prague" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \ 99 && $0 ~ /1947 Feb 23/) { 100 if (($(in_comment + 2) != "-") == (DATAFORM != "rearguard")) { 101 uncomment = in_comment 102 } else { 103 comment_out = !in_comment 104 } 105 } 106 107 # If this line should differ due to Ireland using negative SAVE values, 108 # uncomment the desired version and comment out the undesired one. 109 Rule_Eire = $0 ~ /^#?Rule[\t ]+Eire[\t ]/ 110 Zone_Dublin_post_1968 \ 111 = (zone == "Europe/Dublin" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \ 112 && (!$(in_comment + 4) || 1968 < $(in_comment + 4))) 113 if (Rule_Eire || Zone_Dublin_post_1968) { 114 if ((Rule_Eire \ 115 || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT")) \ 116 == (DATAFORM != "rearguard")) { 117 uncomment = in_comment 118 } else { 119 comment_out = !in_comment 120 } 121 } 122 123 # If this line should differ due to Namibia using negative SAVE values, 124 # uncomment the desired version and comment out the undesired one. 125 Rule_Namibia = $0 ~ /^#?Rule[\t ]+Namibia[\t ]/ 126 Zone_using_Namibia_rule \ 127 = (zone == "Africa/Windhoek" && $0 ~ /^#?[\t ]+[12]:00[\t ]/ \ 128 && ($(in_comment + 2) == "Namibia" \ 129 || ($(in_comment + 2) == "-" && $(in_comment + 3) == "CAT" \ 130 && ((1994 <= $(in_comment + 4) && $(in_comment + 4) <= 2017) \ 131 || in_comment + 3 == NF)))) 132 if (Rule_Namibia || Zone_using_Namibia_rule) { 133 if ((Rule_Namibia \ 134 ? ($9 ~ /^-/ || ($9 == 0 && $10 == "CAT")) \ 135 : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \ 136 == (DATAFORM != "rearguard")) { 137 uncomment = in_comment 138 } else { 139 comment_out = !in_comment 140 } 141 } 142 143 # If this line should differ due to Portugal benefiting from %z if supported, 144 # comment out the undesired version and uncomment the desired one. 145 if ($0 ~ /^#?[\t ]+-[12]:00[\t ]+((Port|W-Eur)[\t ]+[%+-]|-[\t ]+(%z|-01)[\t ]+1982 Mar 28)/) { 146 if (($0 ~ /%z/) == (DATAFORM == "rearguard")) { 147 comment_out = !in_comment 148 } else { 149 uncomment = in_comment 150 } 151 } 152 153 # In vanguard form, use the line "Zone GMT 0 - GMT" instead of 154 # "Zone Etc/GMT 0 - GMT" and adjust Link lines accordingly. 155 # This works around a bug in TZUpdater 2.3.2. 156 if (/^#?(Zone|Link)[\t ]+(Etc\/)?GMT[\t ]/) { 157 if (($2 == "GMT") == (DATAFORM == "vanguard")) { 158 uncomment = in_comment 159 } else { 160 comment_out = !in_comment 161 } 162 } 163 164 if (uncomment) { 165 sub(/^#/, "") 166 } 167 if (comment_out) { 168 sub(/^/, "#") 169 } 170 171 # Prefer explicit abbreviations in rearguard form, %z otherwise. 172 if (DATAFORM == "rearguard") { 173 if ($0 ~ /^[^#]*%z/) { 174 stdoff_column = 2 * ($0 ~ /^Zone/) + 1 175 rules_column = stdoff_column + 1 176 stdoff = get_minutes($stdoff_column) 177 rules = $rules_column 178 stdabbr = offset_abbr(stdoff) 179 if (rules == "-") { 180 abbr = stdabbr 181 } else { 182 dstabbr_only = rules ~ /^[+0-9-]/ 183 if (dstabbr_only) { 184 dstoff = get_minutes(rules) 185 } else { 186 # The DST offset is normally an hour, but there are special cases. 187 if (rules == "Morocco" && NF == 3) { 188 dstoff = -60 189 } else if (rules == "NBorneo") { 190 dstoff = 20 191 } else if (((rules == "Cook" || rules == "LH") && NF == 3) \ 192 || (rules == "Uruguay" \ 193 && $0 ~ /[\t ](1942 Dec 14|1960|1970|1974 Dec 22)$/)) { 194 dstoff = 30 195 } else if (rules == "Uruguay" && $0 ~ /[\t ]1974 Mar 10$/) { 196 dstoff = 90 197 } else { 198 dstoff = 60 199 } 200 } 201 dstabbr = offset_abbr(stdoff + dstoff) 202 if (dstabbr_only) { 203 abbr = dstabbr 204 } else { 205 abbr = stdabbr "/" dstabbr 206 } 207 } 208 sub(/%z/, abbr) 209 } 210 } else { 211 sub(/^(Zone[\t ]+[^\t ]+)?[\t ]+[^\t ]+[\t ]+[^\t ]+[\t ]+[-+][^\t ]+/, \ 212 "&CHANGE-TO-%z") 213 sub(/-00CHANGE-TO-%z/, "-00") 214 sub(/[-+][^\t ]+CHANGE-TO-/, "") 215 } 216 217 # Normally, prefer whole seconds. However, prefer subseconds 218 # if generating vanguard form and the otherwise-undocumented 219 # VANGUARD_SUBSECONDS environment variable is set. 220 # This relies on #STDOFF comment lines in the data. 221 # It is for hypothetical clients that support UT offsets that are 222 # not integer multiples of one second (e.g., Europe/Lisbon, 1884 to 1912). 223 # No known clients need this currently, and this experimental 224 # feature may be changed or withdrawn in future releases. 225 if ($1 == "#STDOFF") { 226 stdoff = $2 227 rounded_stdoff = round_to_second(stdoff) 228 if (DATAFORM == "vanguard" && ENVIRON["VANGUARD_SUBSECONDS"]) { 229 stdoff_subst[0] = rounded_stdoff 230 stdoff_subst[1] = stdoff 231 } else { 232 stdoff_subst[0] = stdoff 233 stdoff_subst[1] = rounded_stdoff 234 } 235 } else if (stdoff_subst[0]) { 236 stdoff_column = 2 * ($0 ~ /^Zone/) + 1 237 stdoff_column_val = $stdoff_column 238 if (stdoff_column_val == stdoff_subst[0]) { 239 sub(stdoff_subst[0], stdoff_subst[1]) 240 } else if (stdoff_column_val != stdoff_subst[1]) { 241 stdoff_subst[0] = 0 242 } 243 } 244 245 # In rearguard form, change the Japan rule line with "Sat>=8 25:00" 246 # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java. 247 if ($0 ~ /^Rule/ && $2 == "Japan") { 248 if (DATAFORM == "rearguard") { 249 if ($7 == "Sat>=8" && $8 == "25:00") { 250 sub(/Sat>=8/, "Sun>=9") 251 sub(/25:00/, " 1:00") 252 } 253 } else { 254 if ($7 == "Sun>=9" && $8 == "1:00") { 255 sub(/Sun>=9/, "Sat>=8") 256 sub(/ 1:00/, "25:00") 257 } 258 } 259 } 260 261 # In rearguard form, change the Morocco lines with negative SAVE values 262 # to use positive SAVE values. 263 if ($2 == "Morocco") { 264 if ($0 ~ /^Rule/) { 265 if ($4 ~ /^201[78]$/ && $6 == "Oct") { 266 if (DATAFORM == "rearguard") { 267 sub(/\t2018\t/, "\t2017\t") 268 } else { 269 sub(/\t2017\t/, "\t2018\t") 270 } 271 } 272 273 if (2019 <= $3) { 274 if ($8 == "2:00") { 275 if (DATAFORM == "rearguard") { 276 sub(/\t0\t/, "\t1:00\t") 277 } else { 278 sub(/\t1:00\t/, "\t0\t") 279 } 280 } else { 281 if (DATAFORM == "rearguard") { 282 sub(/\t-1:00\t/, "\t0\t") 283 } else { 284 sub(/\t0\t/, "\t-1:00\t") 285 } 286 } 287 } 288 } 289 if ($1 ~ /^[+0-9-]/ && NF == 3) { 290 if (DATAFORM == "rearguard") { 291 sub(/1:00\tMorocco/, "0:00\tMorocco") 292 sub(/\t\+01\/\+00$/, "\t+00/+01") 293 } else { 294 sub(/0:00\tMorocco/, "1:00\tMorocco") 295 sub(/\t\+00\/+01$/, "\t+01/+00") 296 } 297 } 298 } 299} 300 301/^Zone/ { 302 packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2]; 303} 304{ 305 if (packrat_ignored && $0 !~ /^Rule/) { 306 sub(/^/, "#") 307 } 308} 309 310# Return a link line resulting by changing OLDLINE to link to TARGET 311# from LINKNAME, instead of linking to OLDTARGET from LINKNAME. 312# Align data columns the same as they were in OLDLINE. 313# Also, replace any existing white space followed by comment with COMMENT. 314function make_linkline(oldline, target, linkname, oldtarget, comment, \ 315 oldprefix, oldprefixlen, oldtargettabs, \ 316 replsuffix, targettabs) 317{ 318 oldprefix = "Link\t" oldtarget "\t" 319 oldprefixlen = length(oldprefix) 320 if (substr(oldline, 1, oldprefixlen) == oldprefix) { 321 # Use tab stops to preserve LINKNAME's column. 322 replsuffix = substr(oldline, oldprefixlen + 1) 323 sub(/[\t ]*#.*/, "", replsuffix) 324 oldtargettabs = int(length(oldtarget) / 8) + 1 325 targettabs = int(length(target) / 8) + 1 326 for (; targettabs < oldtargettabs; targettabs++) { 327 replsuffix = "\t" replsuffix 328 } 329 for (; oldtargettabs < targettabs && replsuffix ~ /^\t/; targettabs--) { 330 replsuffix = substr(replsuffix, 2) 331 } 332 } else { 333 # Odd format line; don't bother lining up its replacement nicely. 334 replsuffix = linkname 335 } 336 return "Link\t" target "\t" replsuffix comment 337} 338 339/^Link/ && $4 == "#=" && DATAFORM == "vanguard" { 340 $0 = make_linkline($0, $5, $3, $2) 341} 342 343# If a Link line is followed by a Link or Zone line for the same data, comment 344# out the Link line. This can happen if backzone overrides a Link 345# with a Zone or a different Link. 346/^Zone/ { 347 sub(/^Link/, "#Link", line[linkline[$2]]) 348} 349/^Link/ { 350 sub(/^Link/, "#Link", line[linkline[$3]]) 351 linkline[$3] = NR 352 linktarget[$3] = $2 353} 354 355{ line[NR] = $0 } 356 357function cut_link_chains_short( \ 358 l, linkname, t, target) 359{ 360 for (linkname in linktarget) { 361 target = linktarget[linkname] 362 t = linktarget[target] 363 if (t) { 364 # TARGET is itself a link name. Replace the line "Link TARGET LINKNAME" 365 # with "Link T LINKNAME #= TARGET", where T is at the end of the chain 366 # of links that LINKNAME points to. 367 while ((u = linktarget[t])) { 368 t = u 369 } 370 l = linkline[linkname] 371 line[l] = make_linkline(line[l], t, linkname, target, "\t#= " target) 372 } 373 } 374} 375 376END { 377 if (DATAFORM != "vanguard") { 378 cut_link_chains_short() 379 } 380 for (i = 1; i <= NR; i++) 381 print line[i] 382} 383