1# Convert tzdata source into vanguard or rearguard form. 2 3# Contributed by Paul Eggert. This file is in the public domain. 4 5# This is not a general-purpose converter; it is designed for current tzdata. 6# It just converts from current source to main, vanguard, and rearguard forms. 7# Although it might be nice for it to be idempotent, or to be useful 8# for converting back and forth between vanguard and rearguard formats, 9# it does not do these nonessential tasks now. 10# 11# Although main and vanguard forms are currently equivalent, 12# this need not always be the case. When the two forms differ, 13# this script can convert either from main to vanguard form (needed then), 14# or from vanguard to main form (this conversion would be needed later, 15# after main became rearguard and vanguard became main). 16# There is no need to convert rearguard to other forms. 17# 18# When converting to vanguard form, the output can use negative SAVE 19# values. 20# 21# When converting to rearguard form, the output uses only nonnegative 22# SAVE values. The idea is for the output data to simulate the behavior 23# of the input data as best it can within the constraints of the 24# rearguard format. 25 26# Given a FIELD like "-0:30", return a minute count like -30. 27function get_minutes(field, \ 28 sign, hours, minutes) 29{ 30 sign = field ~ /^-/ ? -1 : 1 31 hours = +field 32 if (field ~ /:/) { 33 minutes = field 34 sub(/[^:]*:/, "", minutes) 35 } 36 return 60 * hours + sign * minutes 37} 38 39# Given an OFFSET, which is a minute count like 300 or 330, 40# return a %z-style abbreviation like "+05" or "+0530". 41function offset_abbr(offset, \ 42 hours, minutes, sign) 43{ 44 hours = int(offset / 60) 45 minutes = offset % 60 46 if (minutes) { 47 return sprintf("%+.4d", hours * 100 + minutes); 48 } else { 49 return sprintf("%+.2d", hours) 50 } 51} 52 53# Round TIMESTAMP (a +-hh:mm:ss.dddd string) to the nearest second. 54function round_to_second(timestamp, \ 55 hh, mm, ss, seconds, dot_dddd, subseconds) 56{ 57 dot_dddd = timestamp 58 if (!sub(/^[+-]?[0-9]+:[0-9]+:[0-9]+\./, ".", dot_dddd)) 59 return timestamp 60 hh = mm = ss = timestamp 61 sub(/^[-+]?[0-9]+:[0-9]+:/, "", ss) 62 sub(/^[-+]?[0-9]+:/, "", mm) 63 sub(/^[-+]?/, "", hh) 64 seconds = 3600 * hh + 60 * mm + ss 65 subseconds = +dot_dddd 66 seconds += 0.5 < subseconds || ((subseconds == 0.5) && (seconds % 2)); 67 return sprintf("%s%d:%.2d:%.2d", timestamp ~ /^-/ ? "-" : "", \ 68 seconds / 3600, seconds / 60 % 60, seconds % 60) 69} 70 71BEGIN { 72 dataform_type["vanguard"] = 1 73 dataform_type["main"] = 1 74 dataform_type["rearguard"] = 1 75 76 if (PACKRATLIST) { 77 while (getline <PACKRATLIST) { 78 if ($0 ~ /^#/) continue 79 packratlist[$3] = 1 80 } 81 } 82 83 # The command line should set DATAFORM. 84 if (!dataform_type[DATAFORM]) exit 1 85} 86 87$1 == "#PACKRATLIST" && $2 == PACKRATLIST { 88 sub(/^#PACKRATLIST[\t ]+[^\t ]+[\t ]+/, "") 89} 90 91/^Zone/ { zone = $2 } 92 93DATAFORM != "main" { 94 in_comment = $0 ~ /^#/ 95 uncomment = comment_out = 0 96 97 # If this line should differ due to Czechoslovakia using negative SAVE values, 98 # uncomment the desired version and comment out the undesired one. 99 if (zone == "Europe/Prague" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \ 100 && $0 ~ /1947 Feb 23/) { 101 if (($(in_comment + 2) != "-") == (DATAFORM != "rearguard")) { 102 uncomment = in_comment 103 } else { 104 comment_out = !in_comment 105 } 106 } 107 108 # If this line should differ due to Ireland using negative SAVE values, 109 # uncomment the desired version and comment out the undesired one. 110 Rule_Eire = $0 ~ /^#?Rule[\t ]+Eire[\t ]/ 111 Zone_Dublin_post_1968 \ 112 = (zone == "Europe/Dublin" && $0 ~ /^#?[\t ]+[01]:00[\t ]/ \ 113 && (!$(in_comment + 4) || 1968 < $(in_comment + 4))) 114 if (Rule_Eire || Zone_Dublin_post_1968) { 115 if ((Rule_Eire \ 116 || (Zone_Dublin_post_1968 && $(in_comment + 3) == "IST/GMT")) \ 117 == (DATAFORM != "rearguard")) { 118 uncomment = in_comment 119 } else { 120 comment_out = !in_comment 121 } 122 } 123 124 # If this line should differ due to Namibia using negative SAVE values, 125 # uncomment the desired version and comment out the undesired one. 126 Rule_Namibia = $0 ~ /^#?Rule[\t ]+Namibia[\t ]/ 127 Zone_using_Namibia_rule \ 128 = (zone == "Africa/Windhoek" && $0 ~ /^#?[\t ]+[12]:00[\t ]/ \ 129 && ($(in_comment + 2) == "Namibia" \ 130 || ($(in_comment + 2) == "-" && $(in_comment + 3) == "CAT" \ 131 && ((1994 <= $(in_comment + 4) && $(in_comment + 4) <= 2017) \ 132 || in_comment + 3 == NF)))) 133 if (Rule_Namibia || Zone_using_Namibia_rule) { 134 if ((Rule_Namibia \ 135 ? ($9 ~ /^-/ || ($9 == 0 && $10 == "CAT")) \ 136 : $(in_comment + 1) == "2:00" && $(in_comment + 2) == "Namibia") \ 137 == (DATAFORM != "rearguard")) { 138 uncomment = in_comment 139 } else { 140 comment_out = !in_comment 141 } 142 } 143 144 # If this line should differ due to Portugal benefiting from %z if supported, 145 # uncomment the desired version and comment out the undesired one. 146 if ($0 ~ /^#?[\t ]+-[12]:00[\t ]+Port[\t ]+[%+-]/) { 147 if (($0 ~ /%z/) == (DATAFORM == "vanguard")) { 148 uncomment = in_comment 149 } else { 150 comment_out = !in_comment 151 } 152 } 153 154 if (uncomment) { 155 sub(/^#/, "") 156 } 157 if (comment_out) { 158 sub(/^/, "#") 159 } 160 161 # Prefer %z in vanguard form, explicit abbreviations otherwise. 162 if (DATAFORM == "vanguard") { 163 sub(/^(Zone[\t ]+[^\t ]+)?[\t ]+[^\t ]+[\t ]+[^\t ]+[\t ]+[-+][^\t ]+/, \ 164 "&CHANGE-TO-%z") 165 sub(/-00CHANGE-TO-%z/, "-00") 166 sub(/[-+][^\t ]+CHANGE-TO-/, "") 167 } else { 168 if ($0 ~ /^[^#]*%z/) { 169 stdoff_column = 2 * ($0 ~ /^Zone/) + 1 170 rules_column = stdoff_column + 1 171 stdoff = get_minutes($stdoff_column) 172 rules = $rules_column 173 stdabbr = offset_abbr(stdoff) 174 if (rules == "-") { 175 abbr = stdabbr 176 } else { 177 dstabbr_only = rules ~ /^[+0-9-]/ 178 if (dstabbr_only) { 179 dstoff = get_minutes(rules) 180 } else { 181 # The DST offset is normally an hour, but there are special cases. 182 if (rules == "Morocco" && NF == 3) { 183 dstoff = -60 184 } else if (rules == "NBorneo") { 185 dstoff = 20 186 } else if (((rules == "Cook" || rules == "LH") && NF == 3) \ 187 || (rules == "Uruguay" \ 188 && $0 ~ /[\t ](1942 Dec 14|1960|1970|1974 Dec 22)$/)) { 189 dstoff = 30 190 } else if (rules == "Uruguay" && $0 ~ /[\t ]1974 Mar 10$/) { 191 dstoff = 90 192 } else { 193 dstoff = 60 194 } 195 } 196 dstabbr = offset_abbr(stdoff + dstoff) 197 if (dstabbr_only) { 198 abbr = dstabbr 199 } else { 200 abbr = stdabbr "/" dstabbr 201 } 202 } 203 sub(/%z/, abbr) 204 } 205 } 206 207 # Normally, prefer whole seconds. However, prefer subseconds 208 # if generating vanguard form and the otherwise-undocumented 209 # VANGUARD_SUBSECONDS environment variable is set. 210 # This relies on #STDOFF comment lines in the data. 211 # It is for hypothetical clients that support UT offsets that are 212 # not integer multiples of one second (e.g., Europe/Lisbon, 1884 to 1912). 213 # No known clients need this currently, and this experimental 214 # feature may be changed or withdrawn in future releases. 215 if ($1 == "#STDOFF") { 216 stdoff = $2 217 rounded_stdoff = round_to_second(stdoff) 218 if (DATAFORM == "vanguard" && ENVIRON["VANGUARD_SUBSECONDS"]) { 219 stdoff_subst[0] = rounded_stdoff 220 stdoff_subst[1] = stdoff 221 } else { 222 stdoff_subst[0] = stdoff 223 stdoff_subst[1] = rounded_stdoff 224 } 225 } else if (stdoff_subst[0]) { 226 stdoff_column = 2 * ($0 ~ /^Zone/) + 1 227 stdoff_column_val = $stdoff_column 228 if (stdoff_column_val == stdoff_subst[0]) { 229 sub(stdoff_subst[0], stdoff_subst[1]) 230 } else if (stdoff_column_val != stdoff_subst[1]) { 231 stdoff_subst[0] = 0 232 } 233 } 234 235 # In rearguard form, change the Japan rule line with "Sat>=8 25:00" 236 # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java. 237 if ($0 ~ /^Rule/ && $2 == "Japan") { 238 if (DATAFORM == "rearguard") { 239 if ($7 == "Sat>=8" && $8 == "25:00") { 240 sub(/Sat>=8/, "Sun>=9") 241 sub(/25:00/, " 1:00") 242 } 243 } else { 244 if ($7 == "Sun>=9" && $8 == "1:00") { 245 sub(/Sun>=9/, "Sat>=8") 246 sub(/ 1:00/, "25:00") 247 } 248 } 249 } 250 251 # In rearguard form, change the Morocco lines with negative SAVE values 252 # to use positive SAVE values. 253 if ($2 == "Morocco") { 254 if ($0 ~ /^Rule/) { 255 if ($4 ~ /^201[78]$/ && $6 == "Oct") { 256 if (DATAFORM == "rearguard") { 257 sub(/\t2018\t/, "\t2017\t") 258 } else { 259 sub(/\t2017\t/, "\t2018\t") 260 } 261 } 262 263 if (2019 <= $3) { 264 if ($8 == "2:00") { 265 if (DATAFORM == "rearguard") { 266 sub(/\t0\t/, "\t1:00\t") 267 } else { 268 sub(/\t1:00\t/, "\t0\t") 269 } 270 } else { 271 if (DATAFORM == "rearguard") { 272 sub(/\t-1:00\t/, "\t0\t") 273 } else { 274 sub(/\t0\t/, "\t-1:00\t") 275 } 276 } 277 } 278 } 279 if ($1 ~ /^[+0-9-]/ && NF == 3) { 280 if (DATAFORM == "rearguard") { 281 sub(/1:00\tMorocco/, "0:00\tMorocco") 282 sub(/\t\+01\/\+00$/, "\t+00/+01") 283 } else { 284 sub(/0:00\tMorocco/, "1:00\tMorocco") 285 sub(/\t\+00\/+01$/, "\t+01/+00") 286 } 287 } 288 } 289} 290 291/^Zone/ { 292 packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2]; 293} 294{ 295 if (packrat_ignored && $0 !~ /^Rule/) { 296 sub(/^/, "#") 297 } 298} 299 300# If a Link line is followed by a Link or Zone line for the same data, comment 301# out the Link line. This can happen if backzone overrides a Link 302# with a Zone or a different Link. 303/^Zone/ { 304 sub(/^Link/, "#Link", line[linkline[$2]]) 305} 306/^Link/ { 307 sub(/^Link/, "#Link", line[linkline[$3]]) 308 linkline[$3] = NR 309} 310 311{ line[NR] = $0 } 312 313END { 314 for (i = 1; i <= NR; i++) 315 print line[i] 316} 317