1# Convert tzdata source into vanguard or rearguard form. 2 3# Contributed by Paul Eggert. This file is in the public domain. 4 5# This is not a general-purpose converter; it is designed for current tzdata. 6# It just converts from current source to main, vanguard, and rearguard forms. 7# Although it might be nice for it to be idempotent, or to be useful 8# for converting back and forth between vanguard and rearguard formats, 9# it does not do these nonessential tasks now. 10# 11# Although main and vanguard forms are currently equivalent, 12# this need not always be the case. When the two forms differ, 13# this script can convert either from main to vanguard form (needed then), 14# or from vanguard to main form (this conversion would be needed later, 15# after main became rearguard and vanguard became main). 16# There is no need to convert rearguard to other forms. 17# 18# When converting to vanguard form, the output can use the line 19# "Zone GMT 0 - GMT" which TZUpdater 2.3.2 mistakenly rejects. 20# 21# When converting to vanguard form, the output can use negative SAVE 22# values. 23# 24# When converting to rearguard form, the output uses only nonnegative 25# SAVE values. The idea is for the output data to simulate the behavior 26# of the input data as best it can within the constraints of the 27# rearguard format. 28 29# Given a FIELD like "-0:30", return a minute count like -30. 30function get_minutes(field, \ 31 sign, hours, minutes) 32{ 33 sign = field ~ /^-/ ? -1 : 1 34 hours = +field 35 if (field ~ /:/) { 36 minutes = field 37 sub(/[^:]*:/, "", minutes) 38 } 39 return 60 * hours + sign * minutes 40} 41 42# Given an OFFSET, which is a minute count like 300 or 330, 43# return a %z-style abbreviation like "+05" or "+0530". 44function offset_abbr(offset, \ 45 hours, minutes, sign) 46{ 47 hours = int(offset / 60) 48 minutes = offset % 60 49 if (minutes) { 50 return sprintf("%+.4d", hours * 100 + minutes); 51 } else { 52 return sprintf("%+.2d", hours) 53 } 54} 55 56# Round TIMESTAMP (a +-hh:mm:ss.dddd string) to the nearest second. 57function round_to_second(timestamp, \ 58 hh, mm, ss, seconds, dot_dddd, subseconds) 59{ 60 dot_dddd = timestamp 61 if (!sub(/^[+-]?[0-9]+:[0-9]+:[0-9]+\./, ".", dot_dddd)) 62 return timestamp 63 hh = mm = ss = timestamp 64 sub(/^[-+]?[0-9]+:[0-9]+:/, "", ss) 65 sub(/^[-+]?[0-9]+:/, "", mm) 66 sub(/^[-+]?/, "", hh) 67 seconds = 3600 * hh + 60 * mm + ss 68 subseconds = +dot_dddd 69 seconds += 0.5 < subseconds || ((subseconds == 0.5) && (seconds % 2)); 70 return sprintf("%s%d:%.2d:%.2d", timestamp ~ /^-/ ? "-" : "", \ 71 seconds / 3600, seconds / 60 % 60, seconds % 60) 72} 73 74BEGIN { 75 dataform_type["vanguard"] = 1 76 dataform_type["main"] = 1 77 dataform_type["rearguard"] = 1 78 79 if (PACKRATLIST) { 80 while (getline <PACKRATLIST) { 81 if ($0 ~ /^#/) continue 82 packratlist[$3] = 1 83 } 84 } 85 86 # The command line should set DATAFORM. 87 if (!dataform_type[DATAFORM]) exit 1 88} 89 90$1 == "#PACKRATLIST" && $2 == PACKRATLIST { 91 sub(/^#PACKRATLIST[\t ]+[^\t ]+[\t ]+/, "") 92} 93 94/^Zone/ { zone = $2 } 95 96DATAFORM != "main" { 97 in_comment = $0 ~ /^#/ 98 uncomment = comment_out = 0 99 100 # If this line should differ due to Czechoslovakia using negative SAVE values, 101 # uncomment the desired version and comment out the undesired one. 102 if (zone == "Europe/Prague" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \ 103 && $0 ~ /1947 Feb 23/) { 104 if (($(in_comment + 2) != "-") == (DATAFORM != "rearguard")) { 105 uncomment = in_comment 106 } else { 107 comment_out = !in_comment 108 } 109 } 110 111 # If this line should differ due to Ireland using negative SAVE values, 112 # uncomment the desired version and comment out the undesired one. 113 Rule_Eire = $0 ~ /^#?Rule[\t ]+Eire[\t ]/ 114 Zone_Dublin_post_1968 \ 115 = (zone == "Europe/Dublin" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \ 116 && (!$(in_comment + 4) || 1968 < $(in_comment + 4))) 117 if (Rule_Eire || Zone_Dublin_post_1968) { 118 if ((Rule_Eire \ 119 || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT")) \ 120 == (DATAFORM != "rearguard")) { 121 uncomment = in_comment 122 } else { 123 comment_out = !in_comment 124 } 125 } 126 127 # If this line should differ due to Namibia using negative SAVE values, 128 # uncomment the desired version and comment out the undesired one. 129 Rule_Namibia = $0 ~ /^#?Rule[\t ]+Namibia[\t ]/ 130 Zone_using_Namibia_rule \ 131 = (zone == "Africa/Windhoek" && $0 ~ /^#?[\t ]+[12]:00[\t ]/ \ 132 && ($(in_comment + 2) == "Namibia" \ 133 || ($(in_comment + 2) == "-" && $(in_comment + 3) == "CAT" \ 134 && ((1994 <= $(in_comment + 4) && $(in_comment + 4) <= 2017) \ 135 || in_comment + 3 == NF)))) 136 if (Rule_Namibia || Zone_using_Namibia_rule) { 137 if ((Rule_Namibia \ 138 ? ($9 ~ /^-/ || ($9 == 0 && $10 == "CAT")) \ 139 : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \ 140 == (DATAFORM != "rearguard")) { 141 uncomment = in_comment 142 } else { 143 comment_out = !in_comment 144 } 145 } 146 147 # If this line should differ due to Portugal benefiting from %z if supported, 148 # uncomment the desired version and comment out the undesired one. 149 if ($0 ~ /^#?[\t ]+-[12]:00[\t ]+Port[\t ]+[%+-]/) { 150 if (($0 ~ /%z/) == (DATAFORM == "vanguard")) { 151 uncomment = in_comment 152 } else { 153 comment_out = !in_comment 154 } 155 } 156 157 # In vanguard form, use the line "Zone GMT 0 - GMT" instead of 158 # "Zone Etc/GMT 0 - GMT" and adjust Link lines accordingly. 159 # This works around a bug in TZUpdater 2.3.2. 160 if (/^#?(Zone|Link)[\t ]+(Etc\/)?GMT[\t ]/) { 161 if (($2 == "GMT") == (DATAFORM == "vanguard")) { 162 uncomment = in_comment 163 } else { 164 comment_out = !in_comment 165 } 166 } 167 168 if (uncomment) { 169 sub(/^#/, "") 170 } 171 if (comment_out) { 172 sub(/^/, "#") 173 } 174 175 # Prefer %z in vanguard form, explicit abbreviations otherwise. 176 if (DATAFORM == "vanguard") { 177 sub(/^(Zone[\t ]+[^\t ]+)?[\t ]+[^\t ]+[\t ]+[^\t ]+[\t ]+[-+][^\t ]+/, \ 178 "&CHANGE-TO-%z") 179 sub(/-00CHANGE-TO-%z/, "-00") 180 sub(/[-+][^\t ]+CHANGE-TO-/, "") 181 } else { 182 if ($0 ~ /^[^#]*%z/) { 183 stdoff_column = 2 * ($0 ~ /^Zone/) + 1 184 rules_column = stdoff_column + 1 185 stdoff = get_minutes($stdoff_column) 186 rules = $rules_column 187 stdabbr = offset_abbr(stdoff) 188 if (rules == "-") { 189 abbr = stdabbr 190 } else { 191 dstabbr_only = rules ~ /^[+0-9-]/ 192 if (dstabbr_only) { 193 dstoff = get_minutes(rules) 194 } else { 195 # The DST offset is normally an hour, but there are special cases. 196 if (rules == "Morocco" && NF == 3) { 197 dstoff = -60 198 } else if (rules == "NBorneo") { 199 dstoff = 20 200 } else if (((rules == "Cook" || rules == "LH") && NF == 3) \ 201 || (rules == "Uruguay" \ 202 && $0 ~ /[\t ](1942 Dec 14|1960|1970|1974 Dec 22)$/)) { 203 dstoff = 30 204 } else if (rules == "Uruguay" && $0 ~ /[\t ]1974 Mar 10$/) { 205 dstoff = 90 206 } else { 207 dstoff = 60 208 } 209 } 210 dstabbr = offset_abbr(stdoff + dstoff) 211 if (dstabbr_only) { 212 abbr = dstabbr 213 } else { 214 abbr = stdabbr "/" dstabbr 215 } 216 } 217 sub(/%z/, abbr) 218 } 219 } 220 221 # Normally, prefer whole seconds. However, prefer subseconds 222 # if generating vanguard form and the otherwise-undocumented 223 # VANGUARD_SUBSECONDS environment variable is set. 224 # This relies on #STDOFF comment lines in the data. 225 # It is for hypothetical clients that support UT offsets that are 226 # not integer multiples of one second (e.g., Europe/Lisbon, 1884 to 1912). 227 # No known clients need this currently, and this experimental 228 # feature may be changed or withdrawn in future releases. 229 if ($1 == "#STDOFF") { 230 stdoff = $2 231 rounded_stdoff = round_to_second(stdoff) 232 if (DATAFORM == "vanguard" && ENVIRON["VANGUARD_SUBSECONDS"]) { 233 stdoff_subst[0] = rounded_stdoff 234 stdoff_subst[1] = stdoff 235 } else { 236 stdoff_subst[0] = stdoff 237 stdoff_subst[1] = rounded_stdoff 238 } 239 } else if (stdoff_subst[0]) { 240 stdoff_column = 2 * ($0 ~ /^Zone/) + 1 241 stdoff_column_val = $stdoff_column 242 if (stdoff_column_val == stdoff_subst[0]) { 243 sub(stdoff_subst[0], stdoff_subst[1]) 244 } else if (stdoff_column_val != stdoff_subst[1]) { 245 stdoff_subst[0] = 0 246 } 247 } 248 249 # In rearguard form, change the Japan rule line with "Sat>=8 25:00" 250 # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java. 251 if ($0 ~ /^Rule/ && $2 == "Japan") { 252 if (DATAFORM == "rearguard") { 253 if ($7 == "Sat>=8" && $8 == "25:00") { 254 sub(/Sat>=8/, "Sun>=9") 255 sub(/25:00/, " 1:00") 256 } 257 } else { 258 if ($7 == "Sun>=9" && $8 == "1:00") { 259 sub(/Sun>=9/, "Sat>=8") 260 sub(/ 1:00/, "25:00") 261 } 262 } 263 } 264 265 # In rearguard form, change the Morocco lines with negative SAVE values 266 # to use positive SAVE values. 267 if ($2 == "Morocco") { 268 if ($0 ~ /^Rule/) { 269 if ($4 ~ /^201[78]$/ && $6 == "Oct") { 270 if (DATAFORM == "rearguard") { 271 sub(/\t2018\t/, "\t2017\t") 272 } else { 273 sub(/\t2017\t/, "\t2018\t") 274 } 275 } 276 277 if (2019 <= $3) { 278 if ($8 == "2:00") { 279 if (DATAFORM == "rearguard") { 280 sub(/\t0\t/, "\t1:00\t") 281 } else { 282 sub(/\t1:00\t/, "\t0\t") 283 } 284 } else { 285 if (DATAFORM == "rearguard") { 286 sub(/\t-1:00\t/, "\t0\t") 287 } else { 288 sub(/\t0\t/, "\t-1:00\t") 289 } 290 } 291 } 292 } 293 if ($1 ~ /^[+0-9-]/ && NF == 3) { 294 if (DATAFORM == "rearguard") { 295 sub(/1:00\tMorocco/, "0:00\tMorocco") 296 sub(/\t\+01\/\+00$/, "\t+00/+01") 297 } else { 298 sub(/0:00\tMorocco/, "1:00\tMorocco") 299 sub(/\t\+00\/+01$/, "\t+01/+00") 300 } 301 } 302 } 303} 304 305/^Zone/ { 306 packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2]; 307} 308{ 309 if (packrat_ignored && $0 !~ /^Rule/) { 310 sub(/^/, "#") 311 } 312} 313 314# Return a link line resulting by changing OLDLINE to link to TARGET 315# from LINKNAME, instead of linking to OLDTARGET from LINKNAME. 316# Align data columns the same as they were in OLDLINE. 317# Also, replace any existing white space followed by comment with COMMENT. 318function make_linkline(oldline, target, linkname, oldtarget, comment, \ 319 oldprefix, oldprefixlen, oldtargettabs, \ 320 replsuffix, targettabs) 321{ 322 oldprefix = "Link\t" oldtarget "\t" 323 oldprefixlen = length(oldprefix) 324 if (substr(oldline, 1, oldprefixlen) == oldprefix) { 325 # Use tab stops to preserve LINKNAME's column. 326 replsuffix = substr(oldline, oldprefixlen + 1) 327 sub(/[\t ]*#.*/, "", replsuffix) 328 oldtargettabs = int(length(oldtarget) / 8) + 1 329 targettabs = int(length(target) / 8) + 1 330 for (; targettabs < oldtargettabs; targettabs++) { 331 replsuffix = "\t" replsuffix 332 } 333 for (; oldtargettabs < targettabs && replsuffix ~ /^\t/; targettabs--) { 334 replsuffix = substr(replsuffix, 2) 335 } 336 } else { 337 # Odd format line; don't bother lining up its replacement nicely. 338 replsuffix = linkname 339 } 340 return "Link\t" target "\t" replsuffix comment 341} 342 343/^Link/ && $4 == "#=" && DATAFORM == "vanguard" { 344 $0 = make_linkline($0, $5, $3, $2) 345} 346 347# If a Link line is followed by a Link or Zone line for the same data, comment 348# out the Link line. This can happen if backzone overrides a Link 349# with a Zone or a different Link. 350/^Zone/ { 351 sub(/^Link/, "#Link", line[linkline[$2]]) 352} 353/^Link/ { 354 sub(/^Link/, "#Link", line[linkline[$3]]) 355 linkline[$3] = NR 356 linktarget[$3] = $2 357} 358 359{ line[NR] = $0 } 360 361function cut_link_chains_short( \ 362 l, linkname, t, target) 363{ 364 for (linkname in linktarget) { 365 target = linktarget[linkname] 366 t = linktarget[target] 367 if (t) { 368 # TARGET is itself a link name. Replace the line "Link TARGET LINKNAME" 369 # with "Link T LINKNAME #= TARGET", where T is at the end of the chain 370 # of links that LINKNAME points to. 371 while ((u = linktarget[t])) { 372 t = u 373 } 374 l = linkline[linkname] 375 line[l] = make_linkline(line[l], t, linkname, target, "\t#= " target) 376 } 377 } 378} 379 380END { 381 if (DATAFORM != "vanguard") { 382 cut_link_chains_short() 383 } 384 for (i = 1; i <= NR; i++) 385 print line[i] 386} 387