1# Convert tzdata source into a smaller version of itself. 2 3# Contributed by Paul Eggert. This file is in the public domain. 4 5# This is not a general-purpose converter; it is designed for current tzdata. 6# 'zic' should treat this script's output as if it were identical to 7# this script's input. 8 9 10# Return a new rule name. 11# N_RULE_NAMES keeps track of how many rule names have been generated. 12 13function gen_rule_name(alphabet, base, rule_name, n, digit) 14{ 15 alphabet = "" 16 alphabet = alphabet "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 17 alphabet = alphabet "abcdefghijklmnopqrstuvwxyz" 18 alphabet = alphabet "!$%&'()*+,./:;<=>?@[\\]^_`{|}~" 19 base = length(alphabet) 20 rule_name = "" 21 n = n_rule_names++ 22 23 do { 24 n -= rule_name && n <= base 25 digit = n % base 26 rule_name = substr(alphabet, digit + 1, 1) rule_name 27 n = (n - digit) / base 28 } while (n); 29 30 return rule_name 31} 32 33# Process an input line and save it for later output. 34 35function process_input_line(line, field, end, i, n, startdef) 36{ 37 # Remove comments, normalize spaces, and append a space to each line. 38 sub(/#.*/, "", line) 39 line = line " " 40 gsub(/[\t ]+/, " ", line) 41 42 # Abbreviate keywords. Do not abbreviate "Link" to just "L", 43 # as pre-2017c zic erroneously diagnoses "Li" as ambiguous. 44 sub(/^Link /, "Li ", line) 45 sub(/^Rule /, "R ", line) 46 sub(/^Zone /, "Z ", line) 47 48 # SystemV rules are not needed. 49 if (line ~ /^R SystemV /) return 50 51 # Replace FooAsia rules with the same rules without "Asia", as they 52 # are duplicates. 53 if (match(line, /[^ ]Asia /)) { 54 if (line ~ /^R /) return 55 line = substr(line, 1, RSTART) substr(line, RSTART + 5) 56 } 57 58 # Abbreviate times. 59 while (match(line, /[: ]0+[0-9]/)) 60 line = substr(line, 1, RSTART) substr(line, RSTART + RLENGTH - 1) 61 while (match(line, /:0[^:]/)) 62 line = substr(line, 1, RSTART - 1) substr(line, RSTART + 2) 63 64 # Abbreviate weekday names. Do not abbreviate "Sun" and "Sat", as 65 # pre-2017c zic erroneously diagnoses "Su" and "Sa" as ambiguous. 66 while (match(line, / (last)?(Mon|Wed|Fri)[ <>]/)) { 67 end = RSTART + RLENGTH 68 line = substr(line, 1, end - 4) substr(line, end - 1) 69 } 70 while (match(line, / (last)?(Tue|Thu)[ <>]/)) { 71 end = RSTART + RLENGTH 72 line = substr(line, 1, end - 3) substr(line, end - 1) 73 } 74 75 # Abbreviate "max", "only" and month names. 76 # Do not abbreviate "min", as pre-2017c zic erroneously diagnoses "mi" 77 # as ambiguous. 78 gsub(/ max /, " ma ", line) 79 gsub(/ only /, " o ", line) 80 gsub(/ Jan /, " Ja ", line) 81 gsub(/ Feb /, " F ", line) 82 gsub(/ Apr /, " Ap ", line) 83 gsub(/ Aug /, " Au ", line) 84 gsub(/ Sep /, " S ", line) 85 gsub(/ Oct /, " O ", line) 86 gsub(/ Nov /, " N ", line) 87 gsub(/ Dec /, " D ", line) 88 89 # Strip leading and trailing space. 90 sub(/^ /, "", line) 91 sub(/ $/, "", line) 92 93 # Remove unnecessary trailing zero fields. 94 sub(/ 0+$/, "", line) 95 96 # Remove unnecessary trailing days-of-month "1". 97 if (match(line, /[A-Za-z] 1$/)) 98 line = substr(line, 1, RSTART) 99 100 # Remove unnecessary trailing " Ja" (for January). 101 sub(/ Ja$/, "", line) 102 103 n = split(line, field) 104 105 # Abbreviate rule names. 106 i = field[1] == "Z" ? 4 : field[1] == "Li" ? 0 : 2 107 if (i && field[i] ~ /^[^-+0-9]/) { 108 if (!rule[field[i]]) 109 rule[field[i]] = gen_rule_name() 110 field[i] = rule[field[i]] 111 } 112 113 # If this zone supersedes an earlier one, delete the earlier one 114 # from the saved output lines. 115 startdef = "" 116 if (field[1] == "Z") 117 zonename = startdef = field[2] 118 else if (field[1] == "Li") 119 zonename = startdef = field[3] 120 else if (field[1] == "R") 121 zonename = "" 122 if (startdef) { 123 i = zonedef[startdef] 124 if (i) { 125 do 126 output_line[i - 1] = "" 127 while (output_line[i++] ~ /^[-+0-9]/); 128 } 129 } 130 zonedef[zonename] = nout + 1 131 132 # Save the line for later output. 133 line = field[1] 134 for (i = 2; i <= n; i++) 135 line = line " " field[i] 136 output_line[nout++] = line 137} 138 139function output_saved_lines(i) 140{ 141 for (i = 0; i < nout; i++) 142 if (output_line[i]) 143 print output_line[i] 144} 145 146BEGIN { 147 print "# version", version 148 print "# This zic input file is in the public domain." 149} 150 151/^[\t ]*[^#\t ]/ { 152 process_input_line($0) 153} 154 155END { 156 output_saved_lines() 157} 158