# Convert tzdata source into vanguard or rearguard form. # Contributed by Paul Eggert. This file is in the public domain. # This is not a general-purpose converter; it is designed for current tzdata. # It just converts from current source to main, vanguard, and rearguard forms. # Although it might be nice for it to be idempotent, or to be useful # for converting back and forth between vanguard and rearguard formats, # it does not do these nonessential tasks now. # # Although main and vanguard forms are currently equivalent, # this need not always be the case. When the two forms differ, # this script can convert either from main to vanguard form (needed then), # or from vanguard to main form (this conversion would be needed later, # after main became rearguard and vanguard became main). # There is no need to convert rearguard to other forms. # # When converting to vanguard form, the output can use the line # "Zone GMT 0 - GMT" which TZUpdater 2.3.2 mistakenly rejects. # # When converting to vanguard form, the output can use negative SAVE # values. # # When converting to rearguard form, the output uses only nonnegative # SAVE values. The idea is for the output data to simulate the behavior # of the input data as best it can within the constraints of the # rearguard format. # Given a FIELD like "-0:30", return a minute count like -30. function get_minutes(field, \ sign, hours, minutes) { sign = field ~ /^-/ ? -1 : 1 hours = +field if (field ~ /:/) { minutes = field sub(/[^:]*:/, "", minutes) } return 60 * hours + sign * minutes } # Given an OFFSET, which is a minute count like 300 or 330, # return a %z-style abbreviation like "+05" or "+0530". function offset_abbr(offset, \ hours, minutes, sign) { hours = int(offset / 60) minutes = offset % 60 if (minutes) { return sprintf("%+.4d", hours * 100 + minutes); } else { return sprintf("%+.2d", hours) } } # Round TIMESTAMP (a +-hh:mm:ss.dddd string) to the nearest second. function round_to_second(timestamp, \ hh, mm, ss, seconds, dot_dddd, subseconds) { dot_dddd = timestamp if (!sub(/^[+-]?[0-9]+:[0-9]+:[0-9]+\./, ".", dot_dddd)) return timestamp hh = mm = ss = timestamp sub(/^[-+]?[0-9]+:[0-9]+:/, "", ss) sub(/^[-+]?[0-9]+:/, "", mm) sub(/^[-+]?/, "", hh) seconds = 3600 * hh + 60 * mm + ss subseconds = +dot_dddd seconds += 0.5 < subseconds || ((subseconds == 0.5) && (seconds % 2)); return sprintf("%s%d:%.2d:%.2d", timestamp ~ /^-/ ? "-" : "", \ seconds / 3600, seconds / 60 % 60, seconds % 60) } BEGIN { dataform_type["vanguard"] = 1 dataform_type["main"] = 1 dataform_type["rearguard"] = 1 if (PACKRATLIST) { while (getline =8 25:00" # to "Sun>=9 1:00", to cater to zic before 2007 and to older Java. if ($0 ~ /^Rule/ && $2 == "Japan") { if (DATAFORM == "rearguard") { if ($7 == "Sat>=8" && $8 == "25:00") { sub(/Sat>=8/, "Sun>=9") sub(/25:00/, " 1:00") } } else { if ($7 == "Sun>=9" && $8 == "1:00") { sub(/Sun>=9/, "Sat>=8") sub(/ 1:00/, "25:00") } } } # In rearguard form, change the Morocco lines with negative SAVE values # to use positive SAVE values. if ($2 == "Morocco") { if ($0 ~ /^Rule/) { if ($4 ~ /^201[78]$/ && $6 == "Oct") { if (DATAFORM == "rearguard") { sub(/\t2018\t/, "\t2017\t") } else { sub(/\t2017\t/, "\t2018\t") } } if (2019 <= $3) { if ($8 == "2:00") { if (DATAFORM == "rearguard") { sub(/\t0\t/, "\t1:00\t") } else { sub(/\t1:00\t/, "\t0\t") } } else { if (DATAFORM == "rearguard") { sub(/\t-1:00\t/, "\t0\t") } else { sub(/\t0\t/, "\t-1:00\t") } } } } if ($1 ~ /^[+0-9-]/ && NF == 3) { if (DATAFORM == "rearguard") { sub(/1:00\tMorocco/, "0:00\tMorocco") sub(/\t\+01\/\+00$/, "\t+00/+01") } else { sub(/0:00\tMorocco/, "1:00\tMorocco") sub(/\t\+00\/+01$/, "\t+01/+00") } } } } /^Zone/ { packrat_ignored = FILENAME == PACKRATDATA && PACKRATLIST && !packratlist[$2]; } { if (packrat_ignored && $0 !~ /^Rule/) { sub(/^/, "#") } } # Return a link line resulting by changing OLDLINE to link to TARGET # from LINKNAME, instead of linking to OLDTARGET from LINKNAME. # Align data columns the same as they were in OLDLINE. # Also, replace any existing white space followed by comment with COMMENT. function make_linkline(oldline, target, linkname, oldtarget, comment, \ oldprefix, oldprefixlen, oldtargettabs, \ replsuffix, targettabs) { oldprefix = "Link\t" oldtarget "\t" oldprefixlen = length(oldprefix) if (substr(oldline, 1, oldprefixlen) == oldprefix) { # Use tab stops to preserve LINKNAME's column. replsuffix = substr(oldline, oldprefixlen + 1) sub(/[\t ]*#.*/, "", replsuffix) oldtargettabs = int(length(oldtarget) / 8) + 1 targettabs = int(length(target) / 8) + 1 for (; targettabs < oldtargettabs; targettabs++) { replsuffix = "\t" replsuffix } for (; oldtargettabs < targettabs && replsuffix ~ /^\t/; targettabs--) { replsuffix = substr(replsuffix, 2) } } else { # Odd format line; don't bother lining up its replacement nicely. replsuffix = linkname } return "Link\t" target "\t" replsuffix comment } /^Link/ && $4 == "#=" && DATAFORM == "vanguard" { $0 = make_linkline($0, $5, $3, $2) } # If a Link line is followed by a Link or Zone line for the same data, comment # out the Link line. This can happen if backzone overrides a Link # with a Zone or a different Link. /^Zone/ { sub(/^Link/, "#Link", line[linkline[$2]]) } /^Link/ { sub(/^Link/, "#Link", line[linkline[$3]]) linkline[$3] = NR linktarget[$3] = $2 } { line[NR] = $0 } function cut_link_chains_short( \ l, linkname, t, target) { for (linkname in linktarget) { target = linktarget[linkname] t = linktarget[target] if (t) { # TARGET is itself a link name. Replace the line "Link TARGET LINKNAME" # with "Link T LINKNAME #= TARGET", where T is at the end of the chain # of links that LINKNAME points to. while ((u = linktarget[t])) { t = u } l = linkline[linkname] line[l] = make_linkline(line[l], t, linkname, target, "\t#= " target) } } } END { if (DATAFORM != "vanguard") { cut_link_chains_short() } for (i = 1; i <= NR; i++) print line[i] }