xref: /freebsd/contrib/llvm-project/libcxx/src/experimental/time_zone.cpp (revision e3f4a63af63bea70bc86b6c790b14aa5ee99fcd0)
1 //===----------------------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html
10 
11 // TODO TZDB look at optimizations
12 //
13 // The current algorithm is correct but not efficient. For example, in a named
14 // rule based continuation finding the next rule does quite a bit of work,
15 // returns the next rule and "forgets" its state. This could be better.
16 //
17 // It would be possible to cache lookups. If a time for a zone is calculated its
18 // sys_info could be kept and the next lookup could test whether the time is in
19 // a "known" sys_info. The wording in the Standard hints at this slowness by
20 // "suggesting" this could be implemented on the user's side.
21 
22 // TODO TZDB look at removing quirks
23 //
24 // The code has some special rules to adjust the timing at the continuation
25 // switches. This works correctly, but some of the places feel odd. It would be
26 // good to investigate this further and see whether all quirks are needed or
27 // that there are better fixes.
28 //
29 // These quirks often use a 12h interval; this is the scan interval of zdump,
30 // which implies there are no sys_info objects with a duration of less than 12h.
31 
32 // Work around https://gcc.gnu.org/bugzilla/show_bug.cgi?id=120502
33 
34 #include <__config>
35 
36 // TODO(LLVM 23): When upgrading to GCC 16 this can be removed
37 #ifdef _LIBCPP_COMPILER_GCC
38 #  pragma GCC optimize("-O0")
39 #endif
40 
41 #include <algorithm>
42 #include <cctype>
43 #include <chrono>
44 #include <expected>
45 #include <map>
46 #include <numeric>
47 #include <ranges>
48 
49 #include "include/tzdb/time_zone_private.h"
50 #include "include/tzdb/tzdb_list_private.h"
51 
52 // TODO TZDB remove debug printing
53 #ifdef PRINT
54 #  include <print>
55 #endif
56 
57 _LIBCPP_BEGIN_NAMESPACE_STD
58 
59 #ifdef PRINT
60 template <>
61 struct formatter<chrono::sys_info, char> {
62   template <class ParseContext>
63   constexpr typename ParseContext::iterator parse(ParseContext& ctx) {
64     return ctx.begin();
65   }
66 
67   template <class FormatContext>
68   typename FormatContext::iterator format(const chrono::sys_info& info, FormatContext& ctx) const {
69     return std::format_to(
70         ctx.out(), "[{}, {}) {:%Q%q} {:%Q%q} {}", info.begin, info.end, info.offset, info.save, info.abbrev);
71   }
72 };
73 #endif
74 
75 namespace chrono {
76 
77 //===----------------------------------------------------------------------===//
78 //                           Details
79 //===----------------------------------------------------------------------===//
80 
81 struct __sys_info {
82   sys_info __info;
83   bool __can_merge; // Can the returned sys_info object be merged with
84 };
85 
86 // Return type for helper function to get a sys_info.
87 // - The expected result returns the "best" sys_info object. This object can be
88 //   before the requested time. Sometimes sys_info objects from different
89 //   continuations share their offset, save, and abbrev and these objects are
90 //   merged to one sys_info object. The __can_merge flag determines whether the
91 //   current result can be merged with the next result.
92 // - The unexpected result means no sys_info object was found and the time is
93 //   the time to be used for the next search iteration.
94 using __sys_info_result = expected<__sys_info, sys_seconds>;
95 
96 template <ranges::forward_range _Range,
97           class _Type,
98           class _Proj                                                                                  = identity,
99           indirect_strict_weak_order<const _Type*, projected<ranges::iterator_t<_Range>, _Proj>> _Comp = ranges::less>
100 [[nodiscard]] static ranges::borrowed_iterator_t<_Range>
101 __binary_find(_Range&& __r, const _Type& __value, _Comp __comp = {}, _Proj __proj = {}) {
102   auto __end = ranges::end(__r);
103   auto __ret = ranges::lower_bound(ranges::begin(__r), __end, __value, __comp, __proj);
104   if (__ret == __end)
105     return __end;
106 
107   // When the value does not match the predicate it's equal and a valid result
108   // was found.
109   return !std::invoke(__comp, __value, std::invoke(__proj, *__ret)) ? __ret : __end;
110 }
111 
112 // Format based on https://data.iana.org/time-zones/tz-how-to.html
113 //
114 // 1  a time zone abbreviation that is a string of three or more characters that
115 //    are either ASCII alphanumerics, "+", or "-"
116 // 2  the string "%z", in which case the "%z" will be replaced by a numeric time
117 //    zone abbreviation
118 // 3  a pair of time zone abbreviations separated by a slash ('/'), in which
119 //    case the first string is the abbreviation for the standard time name and
120 //    the second string is the abbreviation for the daylight saving time name
121 // 4  a string containing "%s", in which case the "%s" will be replaced by the
122 //    text in the appropriate Rule's LETTER column, and the resulting string
123 //    should be a time zone abbreviation
124 //
125 // Rule 1 is not strictly validated since America/Barbados uses a two letter
126 // abbreviation AT.
127 [[nodiscard]] static string
128 __format(const __tz::__continuation& __continuation, const string& __letters, seconds __save) {
129   bool __shift = false;
130   string __result;
131   for (char __c : __continuation.__format) {
132     if (__shift) {
133       switch (__c) {
134       case 's':
135         std::ranges::copy(__letters, std::back_inserter(__result));
136         break;
137 
138       case 'z': {
139         if (__continuation.__format.size() != 2)
140           std::__throw_runtime_error(
141               std::format("corrupt tzdb FORMAT field: %z should be the entire contents, instead contains '{}'",
142                           __continuation.__format)
143                   .c_str());
144         chrono::hh_mm_ss __offset{__continuation.__stdoff + __save};
145         if (__offset.is_negative()) {
146           __result += '-';
147           __offset = chrono::hh_mm_ss{-(__continuation.__stdoff + __save)};
148         } else
149           __result += '+';
150 
151         if (__offset.minutes() != 0min)
152           std::format_to(std::back_inserter(__result), "{:%H%M}", __offset);
153         else
154           std::format_to(std::back_inserter(__result), "{:%H}", __offset);
155       } break;
156 
157       default:
158         std::__throw_runtime_error(
159             std::format("corrupt tzdb FORMAT field: invalid sequence '%{}' found, expected %s or %z", __c).c_str());
160       }
161       __shift = false;
162 
163     } else if (__c == '/') {
164       if (__save != 0s)
165         __result.clear();
166       else
167         break;
168 
169     } else if (__c == '%') {
170       __shift = true;
171     } else if (__c == '+' || __c == '-' || std::isalnum(__c)) {
172       __result.push_back(__c);
173     } else {
174       std::__throw_runtime_error(
175           std::format(
176               "corrupt tzdb FORMAT field: invalid character '{}' found, expected +, -, or an alphanumeric value", __c)
177               .c_str());
178     }
179   }
180 
181   if (__shift)
182     std::__throw_runtime_error("corrupt tzdb FORMAT field: input ended with the start of the escape sequence '%'");
183 
184   if (__result.empty())
185     std::__throw_runtime_error("corrupt tzdb FORMAT field: result is empty");
186 
187   return __result;
188 }
189 
190 [[nodiscard]] static sys_seconds __to_sys_seconds(year_month_day __ymd, seconds __seconds) {
191   seconds __result = static_cast<sys_days>(__ymd).time_since_epoch() + __seconds;
192   return sys_seconds{__result};
193 }
194 
195 [[nodiscard]] static seconds __at_to_sys_seconds(const __tz::__continuation& __continuation) {
196   switch (__continuation.__at.__clock) {
197   case __tz::__clock::__local:
198     return __continuation.__at.__time - __continuation.__stdoff -
199            std::visit(
200                [](const auto& __value) {
201                  using _Tp = decay_t<decltype(__value)>;
202                  if constexpr (same_as<_Tp, monostate>)
203                    return chrono::seconds{0};
204                  else if constexpr (same_as<_Tp, __tz::__save>)
205                    return chrono::duration_cast<seconds>(__value.__time);
206                  else if constexpr (same_as<_Tp, std::string>)
207                    // For a named rule based continuation the SAVE depends on the RULE
208                    // active at the end. This should be determined separately.
209                    return chrono::seconds{0};
210                  else
211                    static_assert(false);
212 
213                  std::__libcpp_unreachable();
214                },
215                __continuation.__rules);
216 
217   case __tz::__clock::__universal:
218     return __continuation.__at.__time;
219 
220   case __tz::__clock::__standard:
221     return __continuation.__at.__time - __continuation.__stdoff;
222   }
223   std::__libcpp_unreachable();
224 }
225 
226 [[nodiscard]] static year_month_day __to_year_month_day(year __year, month __month, __tz::__on __on) {
227   return std::visit(
228       [&](const auto& __value) {
229         using _Tp = decay_t<decltype(__value)>;
230         if constexpr (same_as<_Tp, chrono::day>)
231           return year_month_day{__year, __month, __value};
232         else if constexpr (same_as<_Tp, weekday_last>)
233           return year_month_day{static_cast<sys_days>(year_month_weekday_last{__year, __month, __value})};
234         else if constexpr (same_as<_Tp, __tz::__constrained_weekday>)
235           return __value(__year, __month);
236         else
237           static_assert(false);
238 
239         std::__libcpp_unreachable();
240       },
241       __on);
242 }
243 
244 [[nodiscard]] static sys_seconds __until_to_sys_seconds(const __tz::__continuation& __continuation) {
245   // Does UNTIL contain the magic value for the last continuation?
246   if (__continuation.__year == chrono::year::min())
247     return sys_seconds::max();
248 
249   year_month_day __ymd = chrono::__to_year_month_day(__continuation.__year, __continuation.__in, __continuation.__on);
250   return chrono::__to_sys_seconds(__ymd, chrono::__at_to_sys_seconds(__continuation));
251 }
252 
253 // Holds the UNTIL time for a continuation with a named rule.
254 //
255 // Unlike continuations with an fixed SAVE named rules have a variable SAVE.
256 // This means when the UNTIL uses the local wall time the actual UNTIL value can
257 // only be determined when the SAVE is known. This class holds that abstraction.
258 class __named_rule_until {
259 public:
260   explicit __named_rule_until(const __tz::__continuation& __continuation)
261       : __until_{chrono::__until_to_sys_seconds(__continuation)},
262         __needs_adjustment_{
263             // The last continuation of a ZONE has no UNTIL which basically is
264             // until the end of _local_ time. This value is expressed by
265             // sys_seconds::max(). Subtracting the SAVE leaves large value.
266             // However SAVE can be negative, which would add a value to maximum
267             // leading to undefined behaviour. In practice this often results in
268             // an overflow to a very small value.
269             __until_ != sys_seconds::max() && __continuation.__at.__clock == __tz::__clock::__local} {}
270 
271   // Gives the unadjusted until value, this is useful when the SAVE is not known
272   // at all.
273   sys_seconds __until() const noexcept { return __until_; }
274 
275   bool __needs_adjustment() const noexcept { return __needs_adjustment_; }
276 
277   // Returns the UNTIL adjusted for SAVE.
278   sys_seconds operator()(seconds __save) const noexcept { return __until_ - __needs_adjustment_ * __save; }
279 
280 private:
281   sys_seconds __until_;
282   bool __needs_adjustment_;
283 };
284 
285 [[nodiscard]] static seconds __at_to_seconds(seconds __stdoff, const __tz::__rule& __rule) {
286   switch (__rule.__at.__clock) {
287   case __tz::__clock::__local:
288     // Local time and standard time behave the same. This is not
289     // correct. Local time needs to adjust for the current saved time.
290     // To know the saved time the rules need to be known and sorted.
291     // This needs a time so to avoid the chicken and egg adjust the
292     // saving of the local time later.
293     return __rule.__at.__time - __stdoff;
294 
295   case __tz::__clock::__universal:
296     return __rule.__at.__time;
297 
298   case __tz::__clock::__standard:
299     return __rule.__at.__time - __stdoff;
300   }
301   std::__libcpp_unreachable();
302 }
303 
304 [[nodiscard]] static sys_seconds __from_to_sys_seconds(seconds __stdoff, const __tz::__rule& __rule, year __year) {
305   year_month_day __ymd = chrono::__to_year_month_day(__year, __rule.__in, __rule.__on);
306 
307   seconds __at = chrono::__at_to_seconds(__stdoff, __rule);
308   return chrono::__to_sys_seconds(__ymd, __at);
309 }
310 
311 [[nodiscard]] static sys_seconds __from_to_sys_seconds(seconds __stdoff, const __tz::__rule& __rule) {
312   return chrono::__from_to_sys_seconds(__stdoff, __rule, __rule.__from);
313 }
314 
315 [[nodiscard]] static const vector<__tz::__rule>&
316 __get_rules(const __tz::__rules_storage_type& __rules_db, const string& __rule_name) {
317   auto __result = chrono::__binary_find(__rules_db, __rule_name, {}, [](const auto& __p) { return __p.first; });
318   if (__result == std::end(__rules_db))
319     std::__throw_runtime_error(("corrupt tzdb: rule '" + __rule_name + " 'does not exist").c_str());
320 
321   return __result->second;
322 }
323 
324 // Returns the letters field for a time before the first rule.
325 //
326 // Per https://data.iana.org/time-zones/tz-how-to.html
327 // One wrinkle, not fully explained in zic.8.txt, is what happens when switching
328 // to a named rule. To what values should the SAVE and LETTER data be
329 // initialized?
330 //
331 // 1 If at least one transition has happened, use the SAVE and LETTER data from
332 //   the most recent.
333 // 2 If switching to a named rule before any transition has happened, assume
334 //   standard time (SAVE zero), and use the LETTER data from the earliest
335 //   transition with a SAVE of zero.
336 //
337 // This function implements case 2.
338 [[nodiscard]] static string __letters_before_first_rule(const vector<__tz::__rule>& __rules) {
339   auto __letters =
340       __rules                                                                                //
341       | views::filter([](const __tz::__rule& __rule) { return __rule.__save.__time == 0s; }) //
342       | views::transform([](const __tz::__rule& __rule) { return __rule.__letters; })        //
343       | views::take(1);
344 
345   if (__letters.empty())
346     std::__throw_runtime_error("corrupt tzdb: rule has zero entries");
347 
348   return __letters.front();
349 }
350 
351 // Determines the information based on the continuation and the rules.
352 //
353 // There are several special cases to take into account
354 //
355 // === Entries before the first rule becomes active ===
356 // Asia/Hong_Kong
357 //   9 - JST 1945 N 18 2        // (1)
358 //   8 HK HK%sT                 // (2)
359 //   R HK 1946 o - Ap 21 0 1 S  // (3)
360 // There (1) is active until Novemer 18th 1945 at 02:00, after this time
361 // (2) becomes active. The first rule entry for HK (3) becomes active
362 // from April 21st 1945 at 01:00. In the period between (2) is active.
363 // This entry has an offset.
364 // This entry has no save, letters, or dst flag. So in the period
365 // after (1) and until (3) no rule entry is associated with the time.
366 
367 [[nodiscard]] static sys_info __get_sys_info_before_first_rule(
368     sys_seconds __begin,
369     sys_seconds __end,
370     const __tz::__continuation& __continuation,
371     const vector<__tz::__rule>& __rules) {
372   return sys_info{
373       __begin,
374       __end,
375       __continuation.__stdoff,
376       chrono::minutes(0),
377       chrono::__format(__continuation, __letters_before_first_rule(__rules), 0s)};
378 }
379 
380 // Returns the sys_info object for a time before the first rule.
381 // When this first rule has a SAVE of 0s the sys_info for the time before the
382 // first rule and for the first rule are identical and will be merged.
383 [[nodiscard]] static sys_info __get_sys_info_before_first_rule(
384     sys_seconds __begin,
385     sys_seconds __rule_end, // The end used when SAVE != 0s
386     sys_seconds __next_end, // The end used when SAVE == 0s the times are merged
387     const __tz::__continuation& __continuation,
388     const vector<__tz::__rule>& __rules,
389     vector<__tz::__rule>::const_iterator __rule) {
390   if (__rule->__save.__time != 0s)
391     return __get_sys_info_before_first_rule(__begin, __rule_end, __continuation, __rules);
392 
393   return sys_info{
394       __begin, __next_end, __continuation.__stdoff, 0min, chrono::__format(__continuation, __rule->__letters, 0s)};
395 }
396 
397 [[nodiscard]] static seconds __at_to_seconds(seconds __stdoff, seconds __save, const __tz::__rule& __rule) {
398   switch (__rule.__at.__clock) {
399   case __tz::__clock::__local:
400     return __rule.__at.__time - __stdoff - __save;
401 
402   case __tz::__clock::__universal:
403     return __rule.__at.__time;
404 
405   case __tz::__clock::__standard:
406     return __rule.__at.__time - __stdoff;
407   }
408   std::__libcpp_unreachable();
409 }
410 
411 [[nodiscard]] static sys_seconds
412 __rule_to_sys_seconds(seconds __stdoff, seconds __save, const __tz::__rule& __rule, year __year) {
413   year_month_day __ymd = chrono::__to_year_month_day(__year, __rule.__in, __rule.__on);
414 
415   seconds __at = chrono::__at_to_seconds(__stdoff, __save, __rule);
416   return chrono::__to_sys_seconds(__ymd, __at);
417 }
418 
419 // Returns the first rule after __time.
420 // Note that a rule can be "active" in multiple years, this may result in an
421 // infinite loop where the same rule is returned every time, use __current to
422 // guard against that.
423 //
424 // When no next rule exists the returned time will be sys_seconds::max(). This
425 // can happen in practice. For example,
426 //
427 //   R So 1945 o - May 24 2 2 M
428 //   R So 1945 o - S 24 3 1 S
429 //   R So 1945 o - N 18 2s 0 -
430 //
431 // Has 3 rules that are all only active in 1945.
432 [[nodiscard]] static pair<sys_seconds, vector<__tz::__rule>::const_iterator>
433 __next_rule(sys_seconds __time,
434             seconds __stdoff,
435             seconds __save,
436             const vector<__tz::__rule>& __rules,
437             vector<__tz::__rule>::const_iterator __current) {
438   year __year = year_month_day{chrono::floor<days>(__time)}.year();
439 
440   // Note it would probably be better to store the pairs in a vector and then
441   // use min() to get the smallest element
442   map<sys_seconds, vector<__tz::__rule>::const_iterator> __candidates;
443   // Note this evaluates all rules which is a waste of effort; when the entries
444   // are beyond the current year's "next year" (where "next year" is not always
445   // year + 1) the algorithm should end.
446   for (auto __it = __rules.begin(); __it != __rules.end(); ++__it) {
447     for (year __y = __it->__from; __y <= __it->__to; ++__y) {
448       // Adding the current entry for the current year may lead to infinite
449       // loops due to the SAVE adjustment. Skip these entries.
450       if (__y == __year && __it == __current)
451         continue;
452 
453       sys_seconds __t = chrono::__rule_to_sys_seconds(__stdoff, __save, *__it, __y);
454       if (__t <= __time)
455         continue;
456 
457       _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(!__candidates.contains(__t), "duplicated rule");
458       __candidates[__t] = __it;
459       break;
460     }
461   }
462 
463   if (!__candidates.empty()) [[likely]] {
464     auto __it = __candidates.begin();
465 
466     // When no rule is selected the time before the first rule and the first rule
467     // should not be merged.
468     if (__time == sys_seconds::min())
469       return *__it;
470 
471     // There can be two constitutive rules that are the same. For example,
472     // Hong Kong
473     //
474     // R HK 1973 o - D 30 3:30 1 S          (R1)
475     // R HK 1965 1976 - Ap Su>=16 3:30 1 S  (R2)
476     //
477     // 1973-12-29 19:30:00 R1 becomes active.
478     // 1974-04-20 18:30:00 R2 becomes active.
479     // Both rules have a SAVE of 1 hour and LETTERS are S for both of them.
480     while (__it != __candidates.end()) {
481       if (__current->__save.__time != __it->second->__save.__time || __current->__letters != __it->second->__letters)
482         return *__it;
483 
484       ++__it;
485     }
486   }
487 
488   return {sys_seconds::max(), __rules.end()};
489 }
490 
491 // Returns the first rule of a set of rules.
492 // This is not always the first of the listed rules. For example
493 //   R Sa 2008 2009 - Mar Su>=8 0 0 -
494 //   R Sa 2007 2008 - O Su>=8 0 1 -
495 // The transition in October 2007 happens before the transition in March 2008.
496 [[nodiscard]] static vector<__tz::__rule>::const_iterator
497 __first_rule(seconds __stdoff, const vector<__tz::__rule>& __rules) {
498   return chrono::__next_rule(sys_seconds::min(), __stdoff, 0s, __rules, __rules.end()).second;
499 }
500 
501 [[nodiscard]] static __sys_info_result __get_sys_info_rule(
502     sys_seconds __time,
503     sys_seconds __continuation_begin,
504     const __tz::__continuation& __continuation,
505     const vector<__tz::__rule>& __rules) {
506   auto __rule = chrono::__first_rule(__continuation.__stdoff, __rules);
507   _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(__rule != __rules.end(), "the set of rules has no first rule");
508 
509   // Avoid selecting a time before the start of the continuation
510   __time = std::max(__time, __continuation_begin);
511 
512   sys_seconds __rule_begin = chrono::__from_to_sys_seconds(__continuation.__stdoff, *__rule);
513 
514   // The time sought is very likely inside the current rule.
515   // When the continuation's UNTIL uses the local clock there are edge cases
516   // where this is not true.
517   //
518   // Start to walk the rules to find the proper one.
519   //
520   // For now we just walk all the rules TODO TZDB investigate whether a smarter
521   // algorithm would work.
522   auto __next = chrono::__next_rule(__rule_begin, __continuation.__stdoff, __rule->__save.__time, __rules, __rule);
523 
524   // Ignore small steps, this happens with America/Punta_Arenas for the
525   // transition
526   // -4:42:46 - SMT 1927 S
527   // -5 x -05/-04 1932 S
528   // ...
529   //
530   // R x 1927 1931 - S 1 0 1 -
531   // R x 1928 1932 - Ap 1 0 0 -
532   //
533   // America/Punta_Arenas  Thu Sep  1 04:42:45 1927 UT = Thu Sep  1 00:42:45 1927 -04 isdst=1 gmtoff=-14400
534   // America/Punta_Arenas  Sun Apr  1 03:59:59 1928 UT = Sat Mar 31 23:59:59 1928 -04 isdst=1 gmtoff=-14400
535   // America/Punta_Arenas  Sun Apr  1 04:00:00 1928 UT = Sat Mar 31 23:00:00 1928 -05 isdst=0 gmtoff=-18000
536   //
537   // Without this there will be a transition
538   //   [1927-09-01 04:42:45, 1927-09-01 05:00:00) -05:00:00 0min -05
539 
540   if (sys_seconds __begin = __rule->__save.__time != 0s ? __rule_begin : __next.first; __time < __begin) {
541     if (__continuation_begin == sys_seconds::min() || __begin - __continuation_begin > 12h)
542       return __sys_info{__get_sys_info_before_first_rule(
543                             __continuation_begin, __rule_begin, __next.first, __continuation, __rules, __rule),
544                         false};
545 
546     // Europe/Berlin
547     // 1 c CE%sT 1945 May 24 2          (C1)
548     // 1 So CE%sT 1946                  (C2)
549     //
550     // R c 1944 1945 - Ap M>=1 2s 1 S   (R1)
551     //
552     // R So 1945 o - May 24 2 2 M       (R2)
553     //
554     // When C2 becomes active the time would be before the first rule R2,
555     // giving a 1 hour sys_info.
556     seconds __save = __rule->__save.__time;
557     __named_rule_until __continuation_end{__continuation};
558     sys_seconds __sys_info_end = std::min(__continuation_end(__save), __next.first);
559 
560     return __sys_info{
561         sys_info{__continuation_begin,
562                  __sys_info_end,
563                  __continuation.__stdoff + __save,
564                  chrono::duration_cast<minutes>(__save),
565                  chrono::__format(__continuation, __rule->__letters, __save)},
566         __sys_info_end == __continuation_end(__save)};
567   }
568 
569   // See above for America/Asuncion
570   if (__rule->__save.__time == 0s && __time < __next.first) {
571     return __sys_info{
572         sys_info{__continuation_begin,
573                  __next.first,
574                  __continuation.__stdoff,
575                  0min,
576                  chrono::__format(__continuation, __rule->__letters, 0s)},
577         false};
578   }
579 
580   if (__rule->__save.__time != 0s) {
581     // another fix for America/Punta_Arenas when not at the start of the
582     // sys_info object.
583     seconds __save = __rule->__save.__time;
584     if (__continuation_begin >= __rule_begin - __save && __time < __next.first) {
585       return __sys_info{
586           sys_info{__continuation_begin,
587                    __next.first,
588                    __continuation.__stdoff + __save,
589                    chrono::duration_cast<minutes>(__save),
590                    chrono::__format(__continuation, __rule->__letters, __save)},
591           false};
592     }
593   }
594 
595   __named_rule_until __continuation_end{__continuation};
596   while (__next.second != __rules.end()) {
597 #ifdef PRINT
598     std::print(
599         stderr,
600         "Rule for {}: [{}, {}) off={} save={} duration={}\n",
601         __time,
602         __rule_begin,
603         __next.first,
604         __continuation.__stdoff,
605         __rule->__save.__time,
606         __next.first - __rule_begin);
607 #endif
608 
609     sys_seconds __end = __continuation_end(__rule->__save.__time);
610 
611     sys_seconds __sys_info_begin = std::max(__continuation_begin, __rule_begin);
612     sys_seconds __sys_info_end   = std::min(__end, __next.first);
613     seconds __diff               = chrono::abs(__sys_info_end - __sys_info_begin);
614 
615     if (__diff < 12h) {
616       // Z America/Argentina/Buenos_Aires -3:53:48 - LMT 1894 O 31
617       // -4:16:48 - CMT 1920 May
618       // -4 - -04 1930 D
619       // -4 A -04/-03 1969 O 5
620       // -3 A -03/-02 1999 O 3
621       // -4 A -04/-03 2000 Mar 3
622       // ...
623       //
624       // ...
625       // R A 1989 1992 - O Su>=15 0 1 -
626       // R A 1999 o - O Su>=1 0 1 -
627       // R A 2000 o - Mar 3 0 0 -
628       // R A 2007 o - D 30 0 1 -
629       // ...
630 
631       // The 1999 switch uses the same rule, but with a different stdoff.
632       //   R A 1999 o - O Su>=1 0 1 -
633       //     stdoff -3 -> 1999-10-03 03:00:00
634       //     stdoff -4 -> 1999-10-03 04:00:00
635       // This generates an invalid entry and this is evaluated as a transition.
636       // Looking at the zdump like output in libc++ this generates jumps in
637       // the UTC time.
638 
639       __rule         = __next.second;
640       __next         = __next_rule(__next.first, __continuation.__stdoff, __rule->__save.__time, __rules, __rule);
641       __end          = __continuation_end(__rule->__save.__time);
642       __sys_info_end = std::min(__end, __next.first);
643     }
644 
645     if ((__time >= __rule_begin && __time < __next.first) || __next.first >= __end) {
646       __sys_info_begin = std::max(__continuation_begin, __rule_begin);
647       __sys_info_end   = std::min(__end, __next.first);
648 
649       return __sys_info{
650           sys_info{__sys_info_begin,
651                    __sys_info_end,
652                    __continuation.__stdoff + __rule->__save.__time,
653                    chrono::duration_cast<minutes>(__rule->__save.__time),
654                    chrono::__format(__continuation, __rule->__letters, __rule->__save.__time)},
655           __sys_info_end == __end};
656     }
657 
658     __rule_begin = __next.first;
659     __rule       = __next.second;
660     __next       = __next_rule(__rule_begin, __continuation.__stdoff, __rule->__save.__time, __rules, __rule);
661   }
662 
663   return __sys_info{
664       sys_info{std::max(__continuation_begin, __rule_begin),
665                __continuation_end(__rule->__save.__time),
666                __continuation.__stdoff + __rule->__save.__time,
667                chrono::duration_cast<minutes>(__rule->__save.__time),
668                chrono::__format(__continuation, __rule->__letters, __rule->__save.__time)},
669       true};
670 }
671 
672 [[nodiscard]] static __sys_info_result __get_sys_info_basic(
673     sys_seconds __time, sys_seconds __continuation_begin, const __tz::__continuation& __continuation, seconds __save) {
674   sys_seconds __continuation_end = chrono::__until_to_sys_seconds(__continuation);
675   return __sys_info{
676       sys_info{__continuation_begin,
677                __continuation_end,
678                __continuation.__stdoff + __save,
679                chrono::duration_cast<minutes>(__save),
680                chrono::__format(__continuation, __continuation.__format, __save)},
681       true};
682 }
683 
684 [[nodiscard]] static __sys_info_result
685 __get_sys_info(sys_seconds __time,
686                sys_seconds __continuation_begin,
687                const __tz::__continuation& __continuation,
688                const __tz::__rules_storage_type& __rules_db) {
689   return std::visit(
690       [&](const auto& __value) {
691         using _Tp = decay_t<decltype(__value)>;
692         if constexpr (same_as<_Tp, std::string>)
693           return chrono::__get_sys_info_rule(
694               __time, __continuation_begin, __continuation, __get_rules(__rules_db, __value));
695         else if constexpr (same_as<_Tp, monostate>)
696           return chrono::__get_sys_info_basic(__time, __continuation_begin, __continuation, chrono::seconds(0));
697         else if constexpr (same_as<_Tp, __tz::__save>)
698           return chrono::__get_sys_info_basic(__time, __continuation_begin, __continuation, __value.__time);
699         else
700           static_assert(false);
701 
702         std::__libcpp_unreachable();
703       },
704       __continuation.__rules);
705 }
706 
707 // The transition from one continuation to the next continuation may result in
708 // two constitutive continuations with the same "offset" information.
709 // [time.zone.info.sys]/3
710 //   The begin and end data members indicate that, for the associated time_zone
711 //   and time_point, the offset and abbrev are in effect in the range
712 //   [begin, end). This information can be used to efficiently iterate the
713 //   transitions of a time_zone.
714 //
715 // Note that this does considers a change in the SAVE field not to be a
716 // different sys_info, zdump does consider this different.
717 //   LWG XXXX The sys_info range should be affected by save
718 // matches the behaviour of the Standard and zdump.
719 //
720 // Iff the "offsets" are the same '__current.__end' is replaced with
721 // '__next.__end', which effectively merges the two objects in one object. The
722 // function returns true if a merge occurred.
723 [[nodiscard]] bool __merge_continuation(sys_info& __current, const sys_info& __next) {
724   if (__current.end != __next.begin)
725     return false;
726 
727   if (__current.offset != __next.offset || __current.abbrev != __next.abbrev || __current.save != __next.save)
728     return false;
729 
730   __current.end = __next.end;
731   return true;
732 }
733 
734 //===----------------------------------------------------------------------===//
735 //                           Public API
736 //===----------------------------------------------------------------------===//
737 
738 [[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI time_zone time_zone::__create(unique_ptr<time_zone::__impl>&& __p) {
739   _LIBCPP_ASSERT_NON_NULL(__p != nullptr, "initialized time_zone without a valid pimpl object");
740   time_zone result;
741   result.__impl_ = std::move(__p);
742   return result;
743 }
744 
745 _LIBCPP_EXPORTED_FROM_ABI time_zone::~time_zone() = default;
746 
747 [[nodiscard]] _LIBCPP_EXPORTED_FROM_ABI string_view time_zone::__name() const noexcept { return __impl_->__name(); }
748 
749 [[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI sys_info
750 time_zone::__get_info(sys_seconds __time) const {
751   optional<sys_info> __result;
752   bool __valid_result = false; // true iff __result.has_value() is true and
753                                // __result.begin <= __time < __result.end is true.
754   bool __can_merge                 = false;
755   sys_seconds __continuation_begin = sys_seconds::min();
756   // Iterates over the Zone entry and its continuations. Internally the Zone
757   // entry is split in a Zone information and the first continuation. The last
758   // continuation has no UNTIL field. This means the loop should always find a
759   // continuation.
760   //
761   // For more information on background of zone information please consult the
762   // following information
763   //   [zic manual](https://www.man7.org/linux/man-pages/man8/zic.8.html)
764   //   [tz source info](https://data.iana.org/time-zones/tz-how-to.html)
765   //   On POSIX systems the zdump tool can be useful:
766   //     zdump -v Asia/Hong_Kong
767   //   Gives all transitions in the Hong Kong time zone.
768   //
769   // During iteration the result for the current continuation is returned. If
770   // no continuation is applicable it will return the end time as "error". When
771   // two continuations are contiguous and contain the "same" information these
772   // ranges are merged as one range.
773   // The merging requires keeping any result that occurs before __time,
774   // likewise when a valid result is found the algorithm needs to test the next
775   // continuation to see whether it can be merged. For example, Africa/Ceuta
776   // Continuations
777   //  0 s WE%sT 1929                   (C1)
778   //  0 - WET 1967                     (C2)
779   //  0 Sp WE%sT 1984 Mar 16           (C3)
780   //
781   // Rules
782   //  R s 1926 1929 - O Sa>=1 24s 0 -  (R1)
783   //
784   //  R Sp 1967 o - Jun 3 12 1 S       (R2)
785   //
786   // The rule R1 is the last rule used in C1. The rule R2 is the first rule in
787   // C3. Since R2 is the first rule this means when a continuation uses this
788   // rule its value prior to R2 will be SAVE 0 LETTERS of the first entry with a
789   // SAVE of 0, in this case WET.
790   // This gives the following changes in the information.
791   //   1928-10-07 00:00:00 C1 R1 becomes active: offset 0 save 0 abbrev WET
792   //   1929-01-01 00:00:00 C2    becomes active: offset 0 save 0 abbrev WET
793   //   1967-01-01 00:00:00 C3    becomes active: offset 0 save 0 abbrev WET
794   //   1967-06-03 12:00:00 C3 R2 becomes active: offset 0 save 1 abbrev WEST
795   //
796   // The first 3 entries are contiguous and contain the same information, this
797   // means the period [1928-10-07 00:00:00, 1967-06-03 12:00:00) should be
798   // returned in one sys_info object.
799 
800   const auto& __continuations                  = __impl_->__continuations();
801   const __tz::__rules_storage_type& __rules_db = __impl_->__rules_db();
802   for (auto __it = __continuations.begin(); __it != __continuations.end(); ++__it) {
803     const auto& __continuation   = *__it;
804     __sys_info_result __sys_info = chrono::__get_sys_info(__time, __continuation_begin, __continuation, __rules_db);
805 
806     if (__sys_info) {
807       _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
808           __sys_info->__info.begin < __sys_info->__info.end, "invalid sys_info range");
809 
810       // Filters out dummy entries
811       // Z America/Argentina/Buenos_Aires -3:53:48 - LMT 1894 O 31
812       // ...
813       // -4 A -04/-03 2000 Mar 3 (C1)
814       // -3 A -03/-02            (C2)
815       //
816       // ...
817       // R A 2000 o - Mar 3 0 0 -
818       // R A 2007 o - D 30 0 1 -
819       // ...
820       //
821       // This results in an entry
822       //   [2000-03-03 03:00:00, 2000-03-03 04:00:00) -10800s 60min -03
823       // for [C1 & R1, C1, R2) which due to the end of the continuation is an
824       // one hour "sys_info". Instead the entry should be ignored and replaced
825       // by [C2 & R1, C2 & R2) which is the proper range
826       //   "[2000-03-03 03:00:00, 2007-12-30 03:00:00) -02:00:00 60min -02
827 
828       if (std::holds_alternative<string>(__continuation.__rules) && __sys_info->__can_merge &&
829           __sys_info->__info.begin + 12h > __sys_info->__info.end) {
830         __continuation_begin = __sys_info->__info.begin;
831         continue;
832       }
833 
834       if (!__result) {
835         // First entry found, always keep it.
836         __result = __sys_info->__info;
837 
838         __valid_result = __time >= __result->begin && __time < __result->end;
839         __can_merge    = __sys_info->__can_merge;
840       } else if (__can_merge && chrono::__merge_continuation(*__result, __sys_info->__info)) {
841         // The results are merged, update the result state. This may
842         // "overwrite" a valid sys_info object with another valid sys_info
843         // object.
844         __valid_result = __time >= __result->begin && __time < __result->end;
845         __can_merge    = __sys_info->__can_merge;
846       } else {
847         // Here things get interesting:
848         // For example, America/Argentina/San_Luis
849         //
850         //   -3 A -03/-02 2008 Ja 21           (C1)
851         //   -4 Sa -04/-03 2009 O 11           (C2)
852         //
853         //   R A 2007 o - D 30 0 1 -           (R1)
854         //
855         //   R Sa 2007 2008 - O Su>=8 0 1 -    (R2)
856         //
857         // Based on C1 & R1 the end time of C1 is 2008-01-21 03:00:00
858         // Based on C2 & R2 the end time of C1 is 2008-01-21 02:00:00
859         // In this case the earlier time is the real time of the transition.
860         // However the algorithm used gives 2008-01-21 03:00:00.
861         //
862         // So we need to calculate the previous UNTIL in the current context and
863         // see whether it's earlier.
864 
865         // The results could not be merged.
866         // - When we have a valid result that result is the final result.
867         // - Otherwise the result we had is before __time and the result we got
868         //   is at a later time (possibly valid). This result is always better
869         //   than the previous result.
870         if (__valid_result) {
871           return *__result;
872         } else {
873           _LIBCPP_ASSERT_ARGUMENT_WITHIN_DOMAIN(
874               __it != __continuations.begin(), "the first rule should always seed the result");
875           const auto& __last = *(__it - 1);
876           if (std::holds_alternative<string>(__last.__rules)) {
877             // Europe/Berlin
878             // 1 c CE%sT 1945 May 24 2          (C1)
879             // 1 So CE%sT 1946                  (C2)
880             //
881             // R c 1944 1945 - Ap M>=1 2s 1 S   (R1)
882             //
883             // R So 1945 o - May 24 2 2 M       (R2)
884             //
885             // When C2 becomes active the time would be before the first rule R2,
886             // giving a 1 hour sys_info. This is not valid and the results need
887             // merging.
888 
889             if (__result->end != __sys_info->__info.begin) {
890               // When the UTC gap between the rules is due to the change of
891               // offsets adjust the new time to remove the gap.
892               sys_seconds __end   = __result->end - __result->offset;
893               sys_seconds __begin = __sys_info->__info.begin - __sys_info->__info.offset;
894               if (__end == __begin) {
895                 __sys_info->__info.begin = __result->end;
896               }
897             }
898           }
899 
900           __result       = __sys_info->__info;
901           __valid_result = __time >= __result->begin && __time < __result->end;
902           __can_merge    = __sys_info->__can_merge;
903         }
904       }
905       __continuation_begin = __result->end;
906     } else {
907       __continuation_begin = __sys_info.error();
908     }
909   }
910   if (__valid_result)
911     return *__result;
912 
913   std::__throw_runtime_error("tzdb: corrupt db");
914 }
915 
916 // Is the "__local_time" present in "__first" and "__second". If so the
917 // local_info has an ambiguous result.
918 [[nodiscard]] static bool
919 __is_ambiguous(local_seconds __local_time, const sys_info& __first, const sys_info& __second) {
920   std::chrono::local_seconds __end_first{__first.end.time_since_epoch() + __first.offset};
921   std::chrono::local_seconds __begin_second{__second.begin.time_since_epoch() + __second.offset};
922 
923   return __local_time < __end_first && __local_time >= __begin_second;
924 }
925 
926 // Determines the result of the "__local_time". This expects the object
927 // "__first" to be earlier in time than "__second".
928 [[nodiscard]] static local_info
929 __get_info(local_seconds __local_time, const sys_info& __first, const sys_info& __second) {
930   std::chrono::local_seconds __end_first{__first.end.time_since_epoch() + __first.offset};
931   std::chrono::local_seconds __begin_second{__second.begin.time_since_epoch() + __second.offset};
932 
933   if (__local_time < __end_first) {
934     if (__local_time >= __begin_second)
935       // |--------|
936       //        |------|
937       //         ^
938       return {local_info::ambiguous, __first, __second};
939 
940     // |--------|
941     //          |------|
942     //         ^
943     return {local_info::unique, __first, sys_info{}};
944   }
945 
946   if (__local_time < __begin_second)
947     // |--------|
948     //             |------|
949     //           ^
950     return {local_info::nonexistent, __first, __second};
951 
952   // |--------|
953   //          |------|
954   //           ^
955   return {local_info::unique, __second, sys_info{}};
956 }
957 
958 [[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI local_info
959 time_zone::__get_info(local_seconds __local_time) const {
960   seconds __local_seconds = __local_time.time_since_epoch();
961 
962   /* An example of a typical year with a DST switch displayed in local time.
963    *
964    * At the first of April the time goes forward one hour. This means the
965    * time marked with ~~ is not a valid local time. This is represented by the
966    * nonexistent value in local_info.result.
967    *
968    * At the first of November the time goes backward one hour. This means the
969    * time marked with ^^ happens twice. This is represented by the ambiguous
970    * value in local_info.result.
971    *
972    * 2020.11.01                  2021.04.01              2021.11.01
973    * offset +05                  offset +05              offset +05
974    * save    0s                  save    1h              save    0s
975    * |------------//----------|
976    *                             |---------//--------------|
977    *                                                    |-------------
978    *                           ~~                        ^^
979    *
980    * These shifts can happen due to changes in the current time zone for a
981    * location. For example, Indian/Kerguelen switched only once. In 1950 from an
982    * offset of 0 hours to an offset of +05 hours.
983    *
984    * During all these shifts the UTC time will not have gaps.
985    */
986 
987   // The code needs to determine the system time for the local time. There is no
988   // information available. Assume the offset between system time and local time
989   // is 0s. This gives an initial estimate.
990   sys_seconds __guess{__local_seconds};
991   sys_info __info = __get_info(__guess);
992 
993   // At this point the offset can be used to determine an estimate for the local
994   // time. Before doing that, determine the offset and validate whether the
995   // local time is the range [chrono::local_seconds::min(),
996   // chrono::local_seconds::max()).
997   if (__local_seconds < 0s && __info.offset > 0s)
998     if (__local_seconds - chrono::local_seconds::min().time_since_epoch() < __info.offset)
999       return {-1, __info, {}};
1000 
1001   if (__local_seconds > 0s && __info.offset < 0s)
1002     if (chrono::local_seconds::max().time_since_epoch() - __local_seconds < -__info.offset)
1003       return {-2, __info, {}};
1004 
1005   // Based on the information found in the sys_info, the local time can be
1006   // converted to a system time. This resulting time can be in the following
1007   // locations of the sys_info:
1008   //
1009   //                             |---------//--------------|
1010   //                           1   2.1      2.2         2.3  3
1011   //
1012   // 1. The estimate is before the returned sys_info object.
1013   //    The result is either non-existent or unique in the previous sys_info.
1014   // 2. The estimate is in the sys_info object
1015   //    - If the sys_info begin is not sys_seconds::min(), then it might be at
1016   //      2.1 and could be ambiguous with the previous or unique.
1017   //    - If sys_info end is not sys_seconds::max(), then it might be at 2.3
1018   //      and could be ambiguous with the next or unique.
1019   //    - Else it is at 2.2 and always unique. This case happens when a
1020   //      time zone has no transitions. For example, UTC or GMT+1.
1021   // 3. The estimate is after the returned sys_info object.
1022   //    The result is either non-existent or unique in the next sys_info.
1023   //
1024   // There is no specification where the "middle" starts. Similar issues can
1025   // happen when sys_info objects are "short", then "unique in the next" could
1026   // become "ambiguous in the next and the one following". Theoretically there
1027   // is the option of the following time-line
1028   //
1029   // |------------|
1030   //           |----|
1031   //       |-----------------|
1032   //
1033   // However the local_info object only has 2 sys_info objects, so this option
1034   // is not tested.
1035 
1036   sys_seconds __sys_time{__local_seconds - __info.offset};
1037   if (__sys_time < __info.begin)
1038     // Case 1 before __info
1039     return chrono::__get_info(__local_time, __get_info(__info.begin - 1s), __info);
1040 
1041   if (__sys_time >= __info.end)
1042     // Case 3 after __info
1043     return chrono::__get_info(__local_time, __info, __get_info(__info.end));
1044 
1045   // Case 2 in __info
1046   if (__info.begin != sys_seconds::min()) {
1047     // Case 2.1 Not at the beginning, when not ambiguous the result should test
1048     // case 2.3.
1049     sys_info __prev = __get_info(__info.begin - 1s);
1050     if (__is_ambiguous(__local_time, __prev, __info))
1051       return {local_info::ambiguous, __prev, __info};
1052   }
1053 
1054   if (__info.end == sys_seconds::max())
1055     // At the end so it's case 2.2
1056     return {local_info::unique, __info, sys_info{}};
1057 
1058   // This tests case 2.2 or case 2.3.
1059   return chrono::__get_info(__local_time, __info, __get_info(__info.end));
1060 }
1061 
1062 } // namespace chrono
1063 
1064 _LIBCPP_END_NAMESPACE_STD
1065