LibTimeZone: Begin generating GMT offset rules for each time zone

This is a rather naive implementation, but serves as a first pass at
determining the GMT offset for a time zone at a particular point in
time. This implementation ignores DST (because we are not parsing any
RULE entries yet), and ignores any offset patterns of the form "Mon>4"
or "lastSun".
This commit is contained in:
Timothy Flynn 2022-01-10 16:56:09 -05:00 committed by Linus Groh
parent e9c42d0bc5
commit 09c0324880
Notes: sideshowbarker 2024-07-17 21:13:15 +09:00
4 changed files with 199 additions and 34 deletions

View file

@ -5,6 +5,7 @@
*/
#include "../LibUnicode/GeneratorUtil.h" // FIXME: Move this somewhere common.
#include <AK/Format.h>
#include <AK/HashMap.h>
#include <AK/SourceGenerator.h>
#include <AK/String.h>
@ -15,45 +16,61 @@
namespace {
struct Time {
i8 hour { 0 };
u8 minute { 0 };
u8 second { 0 };
};
struct DateTime {
u16 year { 0 };
Optional<u8> month;
Optional<u8> day;
Optional<u8> last_weekday;
Optional<u8> after_weekday;
Optional<u8> day;
Optional<Time> time;
Optional<u8> hour;
Optional<u8> minute;
Optional<u8> second;
};
struct TimeZone {
Time offset;
struct TimeZoneOffset {
i64 offset { 0 };
Optional<DateTime> until;
};
struct TimeZoneData {
HashMap<String, Vector<TimeZone>> time_zones;
HashMap<String, Vector<TimeZoneOffset>> time_zones;
Vector<String> time_zone_names;
Vector<Alias> time_zone_aliases;
};
static Time parse_time(StringView segment)
{
// FIXME: Some times end with a letter, e.g. "2:00u" and "2:00s". Figure out what this means and handle it.
auto segments = segment.split_view(':');
Time time {};
time.hour = segments[0].to_int().value();
time.minute = segments.size() > 1 ? segments[1].substring_view(0, 2).to_uint().value() : 0;
time.second = segments.size() > 2 ? segments[2].substring_view(0, 2).to_uint().value() : 0;
return time;
}
template<>
struct AK::Formatter<DateTime> : Formatter<FormatString> {
ErrorOr<void> format(FormatBuilder& builder, DateTime const& date_time)
{
return Formatter<FormatString>::format(builder,
"{{ {}, {}, {}, {}, {}, {}, {}, {} }}",
date_time.year,
date_time.month.value_or(1),
date_time.day.value_or(1),
date_time.last_weekday.value_or(0),
date_time.after_weekday.value_or(0),
date_time.hour.value_or(0),
date_time.minute.value_or(0),
date_time.second.value_or(0));
}
};
template<>
struct AK::Formatter<TimeZoneOffset> : Formatter<FormatString> {
ErrorOr<void> format(FormatBuilder& builder, TimeZoneOffset const& time_zone_offset)
{
return Formatter<FormatString>::format(builder,
"{{ {}, {}, {} }}",
time_zone_offset.offset,
time_zone_offset.until.value_or({}),
time_zone_offset.until.has_value());
}
};
static Optional<DateTime> parse_date_time(Span<StringView const> segments)
{
constexpr auto months = Array { "Jan"sv, "Feb"sv, "Mar"sv, "Apr"sv, "May"sv, "Jun"sv, "Jul"sv, "Aug"sv, "Sep"sv, "Oct"sv, "Nov"sv, "Dec"sv };
@ -69,7 +86,7 @@ static Optional<DateTime> parse_date_time(Span<StringView const> segments)
date_time.year = segments[0].to_uint().value();
if (segments.size() > 1)
date_time.month = find_index(months.begin(), months.end(), segments[1]);
date_time.month = find_index(months.begin(), months.end(), segments[1]) + 1;
if (segments.size() > 2) {
if (segments[2].starts_with("last"sv)) {
@ -86,13 +103,31 @@ static Optional<DateTime> parse_date_time(Span<StringView const> segments)
}
}
if (segments.size() > 3)
date_time.time = parse_time(segments[3]);
if (segments.size() > 3) {
// FIXME: Some times end with a letter, e.g. "2:00u" and "2:00s". Figure out what this means and handle it.
auto time_segments = segments[3].split_view(':');
date_time.hour = time_segments[0].to_int().value();
date_time.minute = time_segments.size() > 1 ? time_segments[1].substring_view(0, 2).to_uint().value() : 0;
date_time.second = time_segments.size() > 2 ? time_segments[2].substring_view(0, 2).to_uint().value() : 0;
}
return date_time;
}
static Vector<TimeZone>& parse_zone(StringView zone_line, TimeZoneData& time_zone_data)
static i64 parse_time_offset(StringView segment)
{
auto segments = segment.split_view(':');
i64 hours = segments[0].to_int().value();
i64 minutes = segments.size() > 1 ? segments[1].to_uint().value() : 0;
i64 seconds = segments.size() > 2 ? segments[2].to_uint().value() : 0;
i64 sign = ((hours < 0) || (segments[0] == "-0"sv)) ? -1 : 1;
return (hours * 3600) + sign * ((minutes * 60) + seconds);
}
static Vector<TimeZoneOffset>& parse_zone(StringView zone_line, TimeZoneData& time_zone_data)
{
auto segments = zone_line.split_view_if([](char ch) { return (ch == '\t') || (ch == ' '); });
@ -100,8 +135,8 @@ static Vector<TimeZone>& parse_zone(StringView zone_line, TimeZoneData& time_zon
VERIFY(segments[0] == "Zone"sv);
auto name = segments[1];
TimeZone time_zone {};
time_zone.offset = parse_time(segments[2]);
TimeZoneOffset time_zone {};
time_zone.offset = parse_time_offset(segments[2]);
if (segments.size() > 5)
time_zone.until = parse_date_time(segments.span().slice(5));
@ -115,13 +150,13 @@ static Vector<TimeZone>& parse_zone(StringView zone_line, TimeZoneData& time_zon
return time_zones;
}
static void parse_zone_continuation(StringView zone_line, Vector<TimeZone>& time_zones)
static void parse_zone_continuation(StringView zone_line, Vector<TimeZoneOffset>& time_zones)
{
auto segments = zone_line.split_view_if([](char ch) { return (ch == '\t') || (ch == ' '); });
// STDOFF RULES FORMAT [UNTIL]
TimeZone time_zone {};
time_zone.offset = parse_time(segments[0]);
TimeZoneOffset time_zone {};
time_zone.offset = parse_time_offset(segments[0]);
if (segments.size() > 3)
time_zone.until = parse_date_time(segments.span().slice(3));
@ -145,7 +180,7 @@ static ErrorOr<void> parse_time_zones(StringView time_zone_path, TimeZoneData& t
{
// For reference, the man page for `zic` has the best documentation of the TZDB file format.
auto file = TRY(Core::File::open(time_zone_path, Core::OpenMode::ReadOnly));
Vector<TimeZone>* last_parsed_zone = nullptr;
Vector<TimeZoneOffset>* last_parsed_zone = nullptr;
while (file->can_read_line()) {
auto line = file->read_line();
@ -224,13 +259,72 @@ static void generate_time_zone_data_implementation(Core::File& file, TimeZoneDat
#include <AK/Array.h>
#include <AK/BinarySearch.h>
#include <AK/Optional.h>
#include <AK/Span.h>
#include <AK/StringView.h>
#include <AK/Time.h>
#include <LibTimeZone/TimeZone.h>
#include <LibTimeZone/TimeZoneData.h>
namespace TimeZone {
static constexpr auto seconds_per_day = 86'400;
static constexpr auto seconds_per_hour = 3'600;
static constexpr auto seconds_per_minute = 60;
struct DateTime {
AK::Time time_since_epoch() const
{
// FIXME: This implementation does not take last_weekday or after_weekday into account.
i64 seconds_since_epoch = AK::days_since_epoch(year, month, day);
seconds_since_epoch *= seconds_per_day;
seconds_since_epoch += hour * seconds_per_hour;
seconds_since_epoch += minute * seconds_per_minute;
seconds_since_epoch += second;
return AK::Time::from_seconds(seconds_since_epoch);
}
u16 year { 0 };
u8 month { 1 };
u8 day { 1 };
u8 last_weekday { 0 };
u8 after_weekday { 0 };
u8 hour { 0 };
u8 minute { 0 };
u8 second { 0 };
};
struct TimeZoneOffset {
i64 offset { 0 };
DateTime until {};
bool has_until { false };
};
)~~~");
auto append_time_zone_offsets = [&](auto const& name, auto const& time_zone_offsets) {
generator.set("name", name);
generator.set("size", String::number(time_zone_offsets.size()));
generator.append(R"~~~(
static constexpr Array<TimeZoneOffset, @size@> @name@ { {
)~~~");
for (auto const& time_zone_offset : time_zone_offsets)
generator.append(String::formatted(" {},\n", time_zone_offset));
generator.append("} };\n");
};
generate_mapping(generator, time_zone_data.time_zone_names, "TimeZoneOffset"sv, "s_time_zone_offsets"sv, "s_time_zone_offsets_{}", format_identifier,
[&](auto const& name, auto const& value) {
auto const& time_zone_offsets = time_zone_data.time_zones.find(value)->value;
append_time_zone_offsets(name, time_zone_offsets);
});
auto append_string_conversions = [&](StringView enum_title, StringView enum_snake, auto const& values, auto const& aliases) {
HashValueMap<String> hashes;
hashes.ensure_capacity(values.size());
@ -254,14 +348,29 @@ namespace TimeZone {
append_string_conversions("TimeZone"sv, "time_zone"sv, time_zone_data.time_zone_names, time_zone_data.time_zone_aliases);
generator.append(R"~~~(
Optional<i64> get_time_zone_offset(TimeZone time_zone, AK::Time time)
{
// FIXME: This implementation completely ignores DST.
auto const& time_zone_offsets = s_time_zone_offsets[to_underlying(time_zone)];
size_t index = 0;
for (; index < time_zone_offsets.size(); ++index) {
auto const& time_zone_offset = time_zone_offsets[index];
if (!time_zone_offset.has_until || (time_zone_offset.until.time_since_epoch() > time))
break;
}
VERIFY(index < time_zone_offsets.size());
return time_zone_offsets[index].offset;
}
}
)~~~");
VERIFY(file.write(generator.as_string_view()));
}
}
ErrorOr<int> serenity_main(Main::Arguments arguments)
{
StringView generated_header_path;

View file

@ -7,6 +7,7 @@
#include <LibTest/TestCase.h>
#include <AK/StringView.h>
#include <AK/Time.h>
#include <LibTimeZone/TimeZone.h>
#if ENABLE_TIME_ZONE_DATA
@ -84,4 +85,45 @@ TEST_CASE(canonicalize_time_zone)
EXPECT(!TimeZone::canonicalize_time_zone("I don't exist"sv).has_value());
}
TEST_CASE(get_time_zone_offset)
{
auto offset = [](i64 sign, i64 hours, i64 minutes, i64 seconds) {
return sign * ((hours * 3600) + (minutes * 60) + seconds);
};
auto test_offset = [](auto time_zone, i64 time, i64 expected_offset) {
auto actual_offset = TimeZone::get_time_zone_offset(time_zone, AK::Time::from_seconds(time));
VERIFY(actual_offset.has_value());
EXPECT_EQ(*actual_offset, expected_offset);
};
test_offset("America/Chicago"sv, -2717668237, offset(-1, 5, 50, 36)); // Sunday, November 18, 1883 12:09:23 PM
test_offset("America/Chicago"sv, -2717668236, offset(-1, 6, 00, 00)); // Sunday, November 18, 1883 12:09:24 PM
test_offset("America/Chicago"sv, -1067810460, offset(-1, 6, 00, 00)); // Sunday, March 1, 1936 1:59:00 AM
test_offset("America/Chicago"sv, -1067810400, offset(-1, 5, 00, 00)); // Sunday, March 1, 1936 2:00:00 AM
test_offset("America/Chicago"sv, -1045432860, offset(-1, 5, 00, 00)); // Sunday, November 15, 1936 1:59:00 AM
test_offset("America/Chicago"sv, -1045432800, offset(-1, 6, 00, 00)); // Sunday, November 15, 1936 2:00:00 AM
test_offset("Europe/London"sv, -3852662401, offset(-1, 0, 01, 15)); // Tuesday, November 30, 1847 11:59:59 PM
test_offset("Europe/London"sv, -3852662400, offset(+1, 0, 00, 00)); // Wednesday, December 1, 1847 12:00:00 AM
test_offset("Europe/London"sv, -37238401, offset(+1, 0, 00, 00)); // Saturday, October 26, 1968 11:59:59 PM
test_offset("Europe/London"sv, -37238400, offset(+1, 1, 00, 00)); // Sunday, October 27, 1968 12:00:00 AM
test_offset("Europe/London"sv, 57722399, offset(+1, 1, 00, 00)); // Sunday, October 31, 1971 1:59:59 AM
test_offset("Europe/London"sv, 57722400, offset(+1, 0, 00, 00)); // Sunday, October 31, 1971 2:00:00 AM
test_offset("UTC"sv, -1641846268, offset(+1, 0, 00, 00));
test_offset("UTC"sv, 0, offset(+1, 0, 00, 00));
test_offset("UTC"sv, 1641846268, offset(+1, 0, 00, 00));
test_offset("Etc/GMT+4"sv, -1641846268, offset(-1, 4, 00, 00));
test_offset("Etc/GMT+5"sv, 0, offset(-1, 5, 00, 00));
test_offset("Etc/GMT+6"sv, 1641846268, offset(-1, 6, 00, 00));
test_offset("Etc/GMT-12"sv, -1641846268, offset(+1, 12, 00, 00));
test_offset("Etc/GMT-13"sv, 0, offset(+1, 13, 00, 00));
test_offset("Etc/GMT-14"sv, 1641846268, offset(+1, 14, 00, 00));
EXPECT(!TimeZone::get_time_zone_offset("I don't exist"sv, {}).has_value());
}
#endif

View file

@ -24,4 +24,13 @@ Optional<StringView> canonicalize_time_zone(StringView time_zone)
return canonical_time_zone;
}
Optional<i64> __attribute__((weak)) get_time_zone_offset(TimeZone, AK::Time) { return {}; }
Optional<i64> get_time_zone_offset(StringView time_zone, AK::Time time)
{
if (auto maybe_time_zone = time_zone_from_string(time_zone); maybe_time_zone.has_value())
return get_time_zone_offset(*maybe_time_zone, time);
return {};
}
}

View file

@ -8,6 +8,8 @@
#include <AK/Optional.h>
#include <AK/StringView.h>
#include <AK/Time.h>
#include <AK/Types.h>
#include <LibTimeZone/Forward.h>
namespace TimeZone {
@ -16,4 +18,7 @@ Optional<TimeZone> time_zone_from_string(StringView time_zone);
StringView time_zone_to_string(TimeZone time_zone);
Optional<StringView> canonicalize_time_zone(StringView time_zone);
Optional<i64> get_time_zone_offset(TimeZone time_zone, AK::Time time);
Optional<i64> get_time_zone_offset(StringView time_zone, AK::Time time);
}