1# Convert tzdata source into a smaller version of itself. 2 3# Contributed by Paul Eggert. This file is in the public domain. 4 5# This is not a general-purpose converter; it is designed for current tzdata. 6# 'zic' should treat this script's output as if it were identical to 7# this script's input. 8 9# Record a hash N for the new name NAME, checking for collisions. 10 11function record_hash(n, name) 12{ 13 if (used_hashes[n]) { 14 printf "# ! collision: %s %s\n", used_hashes[n], name 15 exit 1 16 } 17 used_hashes[n] = name 18} 19 20# Return a shortened rule name representing NAME, 21# and record this relationship to the hash table. 22 23function gen_rule_name(name, \ 24 n) 25{ 26 # Use a simple mnemonic: the first two letters. 27 n = substr(name, 1, 2) 28 record_hash(n, name) 29 # printf "# %s = %s\n", n, name 30 return n 31} 32 33function prehash_rule_names( \ 34 name) 35{ 36 # Rule names are not part of the tzdb API, so substitute shorter 37 # ones. Shortening them consistently from one release to the next 38 # simplifies comparison of the output. That being said, the 39 # 1-letter names below are not standardized in any way, and can 40 # change arbitrarily from one release to the next, as the main goal 41 # here is compression not comparison. 42 43 # Abbreviating these rules names to one letter saved the most space 44 # circa 2018e. 45 rule["Arg"] = "A" 46 rule["Brazil"] = "B" 47 rule["Canada"] = "C" 48 rule["Denmark"] = "D" 49 rule["EU"] = "E" 50 rule["France"] = "F" 51 rule["GB-Eire"] = "G" 52 rule["Halifax"] = "H" 53 rule["Italy"] = "I" 54 rule["Jordan"] = "J" 55 rule["Egypt"] = "K" # "Kemet" in ancient Egyptian 56 rule["Libya"] = "L" 57 rule["Morocco"] = "M" 58 rule["Neth"] = "N" 59 rule["Poland"] = "O" # arbitrary 60 rule["Palestine"] = "P" 61 rule["Cuba"] = "Q" # Its start sounds like "Q". 62 rule["Russia"] = "R" 63 rule["Syria"] = "S" 64 rule["Turkey"] = "T" 65 rule["Uruguay"] = "U" 66 rule["Vincennes"] = "V" 67 rule["Winn"] = "W" 68 rule["Mongol"] = "X" # arbitrary 69 rule["NT_YK"] = "Y" 70 rule["Zion"] = "Z" 71 rule["Austria"] = "a" 72 rule["Belgium"] = "b" 73 rule["C-Eur"] = "c" 74 rule["Algeria"] = "d" # country code DZ 75 rule["E-Eur"] = "e" 76 rule["Taiwan"] = "f" # Formosa 77 rule["Greece"] = "g" 78 rule["Hungary"] = "h" 79 rule["Iran"] = "i" 80 rule["StJohns"] = "j" 81 rule["Chatham"] = "k" # arbitrary 82 rule["Lebanon"] = "l" 83 rule["Mexico"] = "m" 84 rule["Tunisia"] = "n" # country code TN 85 rule["Moncton"] = "o" # arbitrary 86 rule["Port"] = "p" 87 rule["Albania"] = "q" # arbitrary 88 rule["Regina"] = "r" 89 rule["Spain"] = "s" 90 rule["Toronto"] = "t" 91 rule["US"] = "u" 92 rule["Louisville"] = "v" # ville 93 rule["Iceland"] = "w" # arbitrary 94 rule["Chile"] = "x" # arbitrary 95 rule["Para"] = "y" # country code PY 96 rule["Romania"] = "z" # arbitrary 97 rule["Macau"] = "_" # arbitrary 98 99 # Use ISO 3166 alpha-2 country codes for remaining names that are countries. 100 # This is more systematic, and avoids collisions (e.g., Malta and Moldova). 101 rule["Armenia"] = "AM" 102 rule["Aus"] = "AU" 103 rule["Azer"] = "AZ" 104 rule["Barb"] = "BB" 105 rule["Dhaka"] = "BD" 106 rule["Bulg"] = "BG" 107 rule["Bahamas"] = "BS" 108 rule["Belize"] = "BZ" 109 rule["Swiss"] = "CH" 110 rule["Cook"] = "CK" 111 rule["PRC"] = "CN" 112 rule["Cyprus"] = "CY" 113 rule["Czech"] = "CZ" 114 rule["Germany"] = "DE" 115 rule["DR"] = "DO" 116 rule["Ecuador"] = "EC" 117 rule["Finland"] = "FI" 118 rule["Fiji"] = "FJ" 119 rule["Falk"] = "FK" 120 rule["Ghana"] = "GH" 121 rule["Guat"] = "GT" 122 rule["Hond"] = "HN" 123 rule["Haiti"] = "HT" 124 rule["Eire"] = "IE" 125 rule["Iraq"] = "IQ" 126 rule["Japan"] = "JP" 127 rule["Kyrgyz"] = "KG" 128 rule["ROK"] = "KR" 129 rule["Latvia"] = "LV" 130 rule["Lux"] = "LX" 131 rule["Moldova"] = "MD" 132 rule["Malta"] = "MT" 133 rule["Mauritius"] = "MU" 134 rule["Namibia"] = "NA" 135 rule["Nic"] = "NI" 136 rule["Norway"] = "NO" 137 rule["Peru"] = "PE" 138 rule["Phil"] = "PH" 139 rule["Pakistan"] = "PK" 140 rule["Sudan"] = "SD" 141 rule["Salv"] = "SV" 142 rule["Tonga"] = "TO" 143 rule["Vanuatu"] = "VU" 144 145 # Avoid collisions. 146 rule["Detroit"] = "Dt" # De = Denver 147 148 for (name in rule) { 149 record_hash(rule[name], name) 150 } 151} 152 153function make_line(n, field, \ 154 f, r) 155{ 156 r = field[1] 157 for (f = 2; f <= n; f++) 158 r = r " " field[f] 159 return r 160} 161 162# Process the input line LINE and save it for later output. 163 164function process_input_line(line, \ 165 f, field, end, n, outline, r, \ 166 linkline, ruleline, zoneline) 167{ 168 # Remove comments, normalize spaces, and append a space to each line. 169 sub(/#.*/, "", line) 170 line = line " " 171 gsub(/[\t ]+/, " ", line) 172 173 # Abbreviate keywords and determine line type. 174 linkline = sub(/^Link /, "L ", line) 175 ruleline = sub(/^Rule /, "R ", line) 176 zoneline = sub(/^Zone /, "Z ", line) 177 178 # Replace FooAsia rules with the same rules without "Asia", as they 179 # are duplicates. 180 if (match(line, /[^ ]Asia /)) { 181 if (ruleline) return 182 line = substr(line, 1, RSTART) substr(line, RSTART + 5) 183 } 184 185 # Abbreviate times. 186 while (match(line, /[: ]0+[0-9]/)) 187 line = substr(line, 1, RSTART) substr(line, RSTART + RLENGTH - 1) 188 while (match(line, /:0[^:]/)) 189 line = substr(line, 1, RSTART - 1) substr(line, RSTART + 2) 190 191 # Abbreviate weekday names. 192 while (match(line, / (last)?(Mon|Wed|Fri)[ <>]/)) { 193 end = RSTART + RLENGTH 194 line = substr(line, 1, end - 4) substr(line, end - 1) 195 } 196 while (match(line, / (last)?(Sun|Tue|Thu|Sat)[ <>]/)) { 197 end = RSTART + RLENGTH 198 line = substr(line, 1, end - 3) substr(line, end - 1) 199 } 200 201 # Abbreviate "max", "min", "only" and month names. 202 # Although "max" and "min" can both be abbreviated to just "m", 203 # the longer forms "ma" and "mi" are needed with zic 2023d and earlier. 204 gsub(/ max /, dataform == "vanguard" ? " m " : " ma ", line) 205 gsub(/ min /, dataform == "vanguard" ? " m " : " mi ", line) 206 gsub(/ only /, " o ", line) 207 gsub(/ Jan /, " Ja ", line) 208 gsub(/ Feb /, " F ", line) 209 gsub(/ Apr /, " Ap ", line) 210 gsub(/ Aug /, " Au ", line) 211 gsub(/ Sep /, " S ", line) 212 gsub(/ Oct /, " O ", line) 213 gsub(/ Nov /, " N ", line) 214 gsub(/ Dec /, " D ", line) 215 216 # Strip leading and trailing space. 217 sub(/^ /, "", line) 218 sub(/ $/, "", line) 219 220 # Remove unnecessary trailing zero fields. 221 sub(/ 0+$/, "", line) 222 223 # Remove unnecessary trailing days-of-month "1". 224 if (match(line, /[A-Za-z] 1$/)) 225 line = substr(line, 1, RSTART) 226 227 # Remove unnecessary trailing " Ja" (for January). 228 sub(/ Ja$/, "", line) 229 230 n = split(line, field) 231 232 # Record which rule names are used, and generate their abbreviations. 233 f = zoneline ? 4 : linkline || ruleline ? 0 : 2 234 r = field[f] 235 if (r ~ /^[^-+0-9]/) { 236 rule_used[r] = 1 237 } 238 239 if (zoneline) 240 zonename = startdef = field[2] 241 else if (linkline) 242 zonename = startdef = field[3] 243 else if (ruleline) 244 zonename = "" 245 246 # Save the information for later output. 247 outline = make_line(n, field) 248 if (ruleline) 249 rule_output_line[nrule_out++] = outline 250 else if (linkline) { 251 # In vanguard format with Gawk, links are output sorted by destination. 252 if (dataform == "vanguard" && PROCINFO["version"]) 253 linkdef[zonename] = field[2] 254 else 255 link_output_line[nlink_out++] = outline 256 }else 257 zonedef[zonename] = (zoneline ? "" : zonedef[zonename] "\n") outline 258} 259 260function omit_unused_rules( \ 261 i, field) 262{ 263 for (i = 0; i < nrule_out; i++) { 264 split(rule_output_line[i], field) 265 if (!rule_used[field[2]]) 266 rule_output_line[i] = "" 267 } 268} 269 270function abbreviate_rule_names( \ 271 abbr, f, field, i, n, newdef, newline, r, \ 272 zoneline, zonelines, zonename) 273{ 274 for (i = 0; i < nrule_out; i++) { 275 n = split(rule_output_line[i], field) 276 if (n) { 277 r = field[2] 278 if (r ~ /^[^-+0-9]/) { 279 abbr = rule[r] 280 if (!abbr) { 281 rule[r] = abbr = gen_rule_name(r) 282 } 283 field[2] = abbr 284 rule_output_line[i] = make_line(n, field) 285 } 286 } 287 } 288 for (zonename in zonedef) { 289 zonelines = split(zonedef[zonename], zoneline, /\n/) 290 newdef = "" 291 for (i = 1; i <= zonelines; i++) { 292 newline = zoneline[i] 293 n = split(newline, field) 294 f = i == 1 ? 4 : 2 295 r = rule[field[f]] 296 if (r) { 297 field[f] = r 298 newline = make_line(n, field) 299 } 300 newdef = (newdef ? newdef "\n" : "") newline 301 } 302 zonedef[zonename] = newdef 303 } 304} 305 306function output_saved_lines( \ 307 i, zonename) 308{ 309 for (i = 0; i < nrule_out; i++) 310 if (rule_output_line[i]) 311 print rule_output_line[i] 312 313 # When using gawk, output zones sorted by name. 314 # This makes the output a bit more compressible. 315 PROCINFO["sorted_in"] = "@ind_str_asc" 316 for (zonename in zonedef) 317 print zonedef[zonename] 318 319 if (nlink_out) 320 for (i = 0; i < nlink_out; i++) 321 print link_output_line[i] 322 else { 323 # When using gawk, output links sorted by destination. 324 # This also helps compressibility a bit. 325 PROCINFO["sorted_in"] = "@val_type_asc" 326 for (zonename in linkdef) 327 printf "L %s %s\n", linkdef[zonename], zonename 328 } 329} 330 331BEGIN { 332 # Files that the output normally depends on. 333 default_dep["africa"] = 1 334 default_dep["antarctica"] = 1 335 default_dep["asia"] = 1 336 default_dep["australasia"] = 1 337 default_dep["backward"] = 1 338 default_dep["etcetera"] = 1 339 default_dep["europe"] = 1 340 default_dep["factory"] = 1 341 default_dep["northamerica"] = 1 342 default_dep["southamerica"] = 1 343 default_dep["ziguard.awk"] = 1 344 default_dep["zishrink.awk"] = 1 345 346 # Output a version string from 'version' and related configuration variables 347 # supported by tzdb's Makefile. If you change the makefile or any other files 348 # that affect the output of this script, you should append '-SOMETHING' 349 # to the contents of 'version', where SOMETHING identifies what was changed. 350 351 ndeps = split(deps, dep) 352 ddeps = "" 353 for (i = 1; i <= ndeps; i++) { 354 if (default_dep[dep[i]]) { 355 default_dep[dep[i]]++ 356 } else { 357 ddeps = ddeps " " dep[i] 358 } 359 } 360 for (d in default_dep) { 361 if (default_dep[d] == 1) { 362 ddeps = ddeps " !" d 363 } 364 } 365 print "# version", version 366 if (dataform != "main") { 367 print "# dataform", dataform 368 } 369 if (redo != "posix_right") { 370 print "# redo " redo 371 } 372 if (ddeps) { 373 print "# ddeps" ddeps 374 } 375 print "# This zic input file is in the public domain." 376 377 prehash_rule_names() 378} 379 380/^[\t ]*[^#\t ]/ { 381 process_input_line($0) 382} 383 384END { 385 omit_unused_rules() 386 abbreviate_rule_names() 387 output_saved_lines() 388} 389