1# Convert tzdata source into a smaller version of itself. 2 3# Contributed by Paul Eggert. This file is in the public domain. 4 5# This is not a general-purpose converter; it is designed for current tzdata. 6# 'zic' should treat this script's output as if it were identical to 7# this script's input. 8 9# Record a hash N for the new name NAME, checking for collisions. 10 11function record_hash(n, name) 12{ 13 if (used_hashes[n]) { 14 printf "# ! collision: %s %s\n", used_hashes[n], name 15 exit 1 16 } 17 used_hashes[n] = name 18} 19 20# Return a shortened rule name representing NAME, 21# and record this relationship to the hash table. 22 23function gen_rule_name(name, n) 24{ 25 # Use a simple memonic: the first two letters. 26 n = substr(name, 1, 2) 27 record_hash(n, name) 28 # printf "# %s = %s\n", n, name 29 return n 30} 31 32function prehash_rule_names(name) 33{ 34 # Rule names are not part of the tzdb API, so substitute shorter 35 # ones. Shortening them consistently from one release to the next 36 # simplifies comparison of the output. That being said, the 37 # 1-letter names below are not standardized in any way, and can 38 # change arbitrarily from one release to the next, as the main goal 39 # here is compression not comparison. 40 41 # Abbreviating these rules names to one letter saved the most space 42 # circa 2018e. 43 rule["Arg"] = "A" 44 rule["Brazil"] = "B" 45 rule["Canada"] = "C" 46 rule["Denmark"] = "D" 47 rule["EU"] = "E" 48 rule["France"] = "F" 49 rule["GB-Eire"] = "G" 50 rule["Halifax"] = "H" 51 rule["Italy"] = "I" 52 rule["Jordan"] = "J" 53 rule["Egypt"] = "K" # "Kemet" in ancient Egyptian 54 rule["Libya"] = "L" 55 rule["Morocco"] = "M" 56 rule["Neth"] = "N" 57 rule["Poland"] = "O" # arbitrary 58 rule["Palestine"] = "P" 59 rule["Cuba"] = "Q" # Its start sounds like "Q". 60 rule["Russia"] = "R" 61 rule["Syria"] = "S" 62 rule["Turkey"] = "T" 63 rule["Uruguay"] = "U" 64 rule["Vincennes"] = "V" 65 rule["Winn"] = "W" 66 rule["Mongol"] = "X" # arbitrary 67 rule["NT_YK"] = "Y" 68 rule["Zion"] = "Z" 69 rule["Austria"] = "a" 70 rule["Belgium"] = "b" 71 rule["C-Eur"] = "c" 72 rule["Algeria"] = "d" # country code DZ 73 rule["E-Eur"] = "e" 74 rule["Taiwan"] = "f" # Formosa 75 rule["Greece"] = "g" 76 rule["Hungary"] = "h" 77 rule["Iran"] = "i" 78 rule["StJohns"] = "j" 79 rule["Chatham"] = "k" # arbitrary 80 rule["Lebanon"] = "l" 81 rule["Mexico"] = "m" 82 rule["Tunisia"] = "n" # country code TN 83 rule["Moncton"] = "o" # arbitrary 84 rule["Port"] = "p" 85 rule["Albania"] = "q" # arbitrary 86 rule["Regina"] = "r" 87 rule["Spain"] = "s" 88 rule["Toronto"] = "t" 89 rule["US"] = "u" 90 rule["Louisville"] = "v" # ville 91 rule["Iceland"] = "w" # arbitrary 92 rule["Chile"] = "x" # arbitrary 93 rule["Para"] = "y" # country code PY 94 rule["Romania"] = "z" # arbitrary 95 rule["Macau"] = "_" # arbitrary 96 97 # Use ISO 3166 alpha-2 country codes for remaining names that are countries. 98 # This is more systematic, and avoids collisions (e.g., Malta and Moldova). 99 rule["Armenia"] = "AM" 100 rule["Aus"] = "AU" 101 rule["Azer"] = "AZ" 102 rule["Barb"] = "BB" 103 rule["Dhaka"] = "BD" 104 rule["Bulg"] = "BG" 105 rule["Bahamas"] = "BS" 106 rule["Belize"] = "BZ" 107 rule["Swiss"] = "CH" 108 rule["Cook"] = "CK" 109 rule["PRC"] = "CN" 110 rule["Cyprus"] = "CY" 111 rule["Czech"] = "CZ" 112 rule["Germany"] = "DE" 113 rule["DR"] = "DO" 114 rule["Ecuador"] = "EC" 115 rule["Finland"] = "FI" 116 rule["Fiji"] = "FJ" 117 rule["Falk"] = "FK" 118 rule["Ghana"] = "GH" 119 rule["Guat"] = "GT" 120 rule["Hond"] = "HN" 121 rule["Haiti"] = "HT" 122 rule["Eire"] = "IE" 123 rule["Iraq"] = "IQ" 124 rule["Japan"] = "JP" 125 rule["Kyrgyz"] = "KG" 126 rule["ROK"] = "KR" 127 rule["Latvia"] = "LV" 128 rule["Lux"] = "LX" 129 rule["Moldova"] = "MD" 130 rule["Malta"] = "MT" 131 rule["Mauritius"] = "MU" 132 rule["Namibia"] = "NA" 133 rule["Nic"] = "NI" 134 rule["Norway"] = "NO" 135 rule["Peru"] = "PE" 136 rule["Phil"] = "PH" 137 rule["Pakistan"] = "PK" 138 rule["Sudan"] = "SD" 139 rule["Salv"] = "SV" 140 rule["Tonga"] = "TO" 141 rule["Vanuatu"] = "VU" 142 143 # Avoid collisions. 144 rule["Detroit"] = "Dt" # De = Denver 145 146 for (name in rule) { 147 record_hash(rule[name], name) 148 } 149} 150 151# Process an input line and save it for later output. 152 153function process_input_line(line, field, end, i, n, startdef) 154{ 155 # Remove comments, normalize spaces, and append a space to each line. 156 sub(/#.*/, "", line) 157 line = line " " 158 gsub(/[\t ]+/, " ", line) 159 160 # Abbreviate keywords. Do not abbreviate "Link" to just "L", 161 # as pre-2017c zic erroneously diagnoses "Li" as ambiguous. 162 sub(/^Link /, "Li ", line) 163 sub(/^Rule /, "R ", line) 164 sub(/^Zone /, "Z ", line) 165 166 # SystemV rules are not needed. 167 if (line ~ /^R SystemV /) return 168 169 # Replace FooAsia rules with the same rules without "Asia", as they 170 # are duplicates. 171 if (match(line, /[^ ]Asia /)) { 172 if (line ~ /^R /) return 173 line = substr(line, 1, RSTART) substr(line, RSTART + 5) 174 } 175 # Replace SpainAfrica rules with Morocco, as they are duplicates. 176 if (match(line, / SpainAfrica /)) { 177 if (line ~ /^R /) return 178 line = substr(line, 1, RSTART) "Morocco" substr(line, RSTART + RLENGTH - 1) 179 } 180 181 # Abbreviate times. 182 while (match(line, /[: ]0+[0-9]/)) 183 line = substr(line, 1, RSTART) substr(line, RSTART + RLENGTH - 1) 184 while (match(line, /:0[^:]/)) 185 line = substr(line, 1, RSTART - 1) substr(line, RSTART + 2) 186 187 # Abbreviate weekday names. Do not abbreviate "Sun" and "Sat", as 188 # pre-2017c zic erroneously diagnoses "Su" and "Sa" as ambiguous. 189 while (match(line, / (last)?(Mon|Wed|Fri)[ <>]/)) { 190 end = RSTART + RLENGTH 191 line = substr(line, 1, end - 4) substr(line, end - 1) 192 } 193 while (match(line, / (last)?(Tue|Thu)[ <>]/)) { 194 end = RSTART + RLENGTH 195 line = substr(line, 1, end - 3) substr(line, end - 1) 196 } 197 198 # Abbreviate "max", "only" and month names. 199 # Do not abbreviate "min", as pre-2017c zic erroneously diagnoses "mi" 200 # as ambiguous. 201 gsub(/ max /, " ma ", line) 202 gsub(/ only /, " o ", line) 203 gsub(/ Jan /, " Ja ", line) 204 gsub(/ Feb /, " F ", line) 205 gsub(/ Apr /, " Ap ", line) 206 gsub(/ Aug /, " Au ", line) 207 gsub(/ Sep /, " S ", line) 208 gsub(/ Oct /, " O ", line) 209 gsub(/ Nov /, " N ", line) 210 gsub(/ Dec /, " D ", line) 211 212 # Strip leading and trailing space. 213 sub(/^ /, "", line) 214 sub(/ $/, "", line) 215 216 # Remove unnecessary trailing zero fields. 217 sub(/ 0+$/, "", line) 218 219 # Remove unnecessary trailing days-of-month "1". 220 if (match(line, /[A-Za-z] 1$/)) 221 line = substr(line, 1, RSTART) 222 223 # Remove unnecessary trailing " Ja" (for January). 224 sub(/ Ja$/, "", line) 225 226 n = split(line, field) 227 228 # Abbreviate rule names. 229 i = field[1] == "Z" ? 4 : field[1] == "Li" ? 0 : 2 230 if (i && field[i] ~ /^[^-+0-9]/) { 231 if (!rule[field[i]]) 232 rule[field[i]] = gen_rule_name(field[i]) 233 field[i] = rule[field[i]] 234 } 235 236 # If this zone supersedes an earlier one, delete the earlier one 237 # from the saved output lines. 238 startdef = "" 239 if (field[1] == "Z") 240 zonename = startdef = field[2] 241 else if (field[1] == "Li") 242 zonename = startdef = field[3] 243 else if (field[1] == "R") 244 zonename = "" 245 if (startdef) { 246 i = zonedef[startdef] 247 if (i) { 248 do 249 output_line[i - 1] = "" 250 while (output_line[i++] ~ /^[-+0-9]/); 251 } 252 } 253 zonedef[zonename] = nout + 1 254 255 # Save the line for later output. 256 line = field[1] 257 for (i = 2; i <= n; i++) 258 line = line " " field[i] 259 output_line[nout++] = line 260} 261 262function output_saved_lines(i) 263{ 264 for (i = 0; i < nout; i++) 265 if (output_line[i]) 266 print output_line[i] 267} 268 269BEGIN { 270 # Files that the output normally depends on. 271 default_dep["africa"] = 1 272 default_dep["antarctica"] = 1 273 default_dep["asia"] = 1 274 default_dep["australasia"] = 1 275 default_dep["backward"] = 1 276 default_dep["etcetera"] = 1 277 default_dep["europe"] = 1 278 default_dep["factory"] = 1 279 default_dep["northamerica"] = 1 280 default_dep["southamerica"] = 1 281 default_dep["systemv"] = 1 282 default_dep["ziguard.awk"] = 1 283 default_dep["zishrink.awk"] = 1 284 285 # Output a version string from 'version' and related configuration variables 286 # supported by tzdb's Makefile. If you change the makefile or any other files 287 # that affect the output of this script, you should append '-SOMETHING' 288 # to the contents of 'version', where SOMETHING identifies what was changed. 289 290 ndeps = split(deps, dep) 291 ddeps = "" 292 for (i = 1; i <= ndeps; i++) { 293 if (default_dep[dep[i]]) { 294 default_dep[dep[i]]++ 295 } else { 296 ddeps = ddeps " " dep[i] 297 } 298 } 299 for (d in default_dep) { 300 if (default_dep[d] == 1) { 301 ddeps = ddeps " !" d 302 } 303 } 304 print "# version", version 305 if (dataform != "main") { 306 print "# dataform", dataform 307 } 308 if (redo != "posix_right") { 309 print "# redo " redo 310 } 311 if (ddeps) { 312 print "# ddeps" ddeps 313 } 314 print "# This zic input file is in the public domain." 315 316 prehash_rule_names() 317} 318 319/^[\t ]*[^#\t ]/ { 320 process_input_line($0) 321} 322 323END { 324 output_saved_lines() 325} 326