Travel plans 🚂
Result Set
Regardless of the database, the result set should look like:
route | departure_datetime_utc | arrival_datetime_utc | duration | cost |
---|---|---|---|---|
New York - London Gatwick - London St Pancras - Paris | 2024-01-01 18:00:00 | 2024-01-02 14:30:00 | 20:30:00 | 212.00 |
New York - Paris | 2024-01-01 23:00:00 | 2024-01-02 16:45:00 | 17:45:00 | 279.00 |
Expand for the DDL
solution(route, departure_datetime_utc, arrival_datetime_utc, duration, cost) as (
values
('New York - London Gatwick - London St Pancras - Paris', '2024-01-01 18:00:00'::timestamp, '2024-01-02 14:30:00'::timestamp, '20:30:00', 212.00),
('New York - Paris', '2024-01-01 23:00:00'::timestamp, '2024-01-02 16:45:00'::timestamp, '17:45:00', 279.00)
)
Solution
Some SQL solutions per database are provided below.
DuckDB
There's probably a better way to do this. Please let me know if you find it!
/*
In DuckDB, we can convert a timestamp with a timezone to a timestamp
in UTC by casting it to a varchar and then to a timestamp.
*/
with recursive
date_axis as (
select
/* give us a day either side to be safe */
min(departure_datetime::date) - 1 as date_,
max(arrival_datetime::date) + 1 as max_date,
from routes_timetable
union all
select
date_ + interval '1 day',
max_date,
from date_axis
where date_ < max_date - 1 /* account for final loop */
),
scheduled_timetable as (
select
schedule_id,
from_location,
to_location,
cost,
duration::varchar::interval as duration,
earliest_departure::varchar::time as departure_time_utc,
departure_time_utc + duration::varchar::interval as arrival_time_utc,
latest_departure::varchar::time as limit_,
frequency::varchar::interval as frequency,
from routes_schedule
union all
select
schedule_id,
from_location,
to_location,
cost,
duration,
departure_time_utc + frequency as departure_time_utc,
arrival_time_utc + frequency as arrival_time_utc,
limit_,
frequency,
from scheduled_timetable
where departure_time_utc <= limit_ - frequency /* account for the final loop */
and frequency is not null /* daily schedules don't need to be expanded */
),
timetable as (
select
from_location,
to_location,
departure_datetime::varchar::timestamp as departure_datetime_utc,
arrival_datetime::varchar::timestamp as arrival_datetime_utc,
arrival_datetime_utc - departure_datetime_utc as duration,
cost,
from routes_timetable
union all
select
scheduled_timetable.from_location,
scheduled_timetable.to_location,
scheduled_timetable.departure_time_utc + date_axis.date_,
scheduled_timetable.arrival_time_utc + date_axis.date_,
scheduled_timetable.duration,
scheduled_timetable.cost,
from scheduled_timetable
cross join date_axis
),
routes as (
select
from_location as starting_location,
departure_datetime_utc as starting_departure_datetime_utc,
from_location,
to_location,
departure_datetime_utc,
arrival_datetime_utc,
cost,
from_location || ' - ' || to_location as route,
from timetable
where 1=1
and from_location = 'New York'
and departure_datetime_utc >= '2024-01-01 12:00:00-05:00'
union all
select
routes.starting_location,
routes.starting_departure_datetime_utc,
timetable.from_location,
timetable.to_location,
timetable.departure_datetime_utc,
timetable.arrival_datetime_utc,
routes.cost + timetable.cost,
routes.route || ' - ' || timetable.to_location,
from routes
inner join timetable
on routes.to_location = timetable.from_location
and timetable.departure_datetime_utc between routes.arrival_datetime_utc + interval '30 minutes'
and routes.arrival_datetime_utc + interval '6 hours'
)
select distinct
route,
starting_departure_datetime_utc as departure_datetime_utc,
arrival_datetime_utc,
arrival_datetime_utc - starting_departure_datetime_utc as duration,
cost
from routes
where to_location = 'Paris'
qualify 0=1
/* fastest route */
or 1 = row_number() over (order by duration, cost, route)
/* cheapest route */
or 1 = row_number() over (order by cost, duration, route)
order by arrival_datetime_utc