rs-dublinbus/src/gtfs/loader.rs

211 lines
5.3 KiB
Rust

use gtfs_structures::{Calendar, CalendarDate, RawStopTime, RawTrip, Route, Stop};
use serde::de::DeserializeOwned;
use std::{
collections::{HashMap, HashSet},
fs::File,
hash::Hash,
};
use zip::ZipArchive;
use crate::gtfs::{
Gtfs,
utils::{route_ids_from_numbers, stop_ids_from_codes},
};
trait Filter<T> {
fn accept(&self, v: &T) -> bool;
}
// No filter on loaded records
struct LoadAll {}
impl<T> Filter<T> for LoadAll {
fn accept(&self, _: &T) -> bool {
return true;
}
}
struct LoadRoutes<'a> {
routes: &'a HashSet<String>,
}
impl Filter<Route> for LoadRoutes<'_> {
fn accept(&self, r: &Route) -> bool {
let short_name = &r.short_name;
return short_name.is_some() && self.routes.contains(short_name.as_ref().unwrap());
}
}
struct LoadStops<'a> {
stops: &'a HashSet<String>,
}
impl Filter<Stop> for LoadStops<'_> {
fn accept(&self, s: &Stop) -> bool {
let stop_code = &s.code;
return stop_code.is_some() && self.stops.contains(s.code.as_ref().unwrap());
}
}
struct LoadTrips<'a> {
route_ids: &'a HashSet<String>,
}
impl Filter<RawTrip> for LoadTrips<'_> {
fn accept(&self, t: &RawTrip) -> bool {
let route_id = &t.route_id;
return self.route_ids.contains(route_id);
}
}
struct LoadStopTimes<'a> {
trip_ids: &'a HashSet<String>,
stop_ids: &'a HashSet<String>,
}
impl Filter<RawStopTime> for LoadStopTimes<'_> {
fn accept(&self, st: &RawStopTime) -> bool {
return self.stop_ids.contains(&st.stop_id) && self.trip_ids.contains(&st.trip_id);
}
}
// Loads a vector of the selected type
fn load_vector<T: serde::de::DeserializeOwned>(
destination: &mut Vec<T>,
zip_reader: &mut ZipArchive<File>,
table_name: &str,
) {
let file_reader = zip_reader.by_name(table_name).unwrap();
let mut rdr = csv::Reader::from_reader(file_reader);
for row in rdr.deserialize() {
let record: T = row.unwrap();
destination.push(record);
}
}
// Loads a HashMap of the selected type, using the provided index function as the key
fn load_map<K, V, IndexFn, FilterT>(
destination: &mut HashMap<K, V>,
zip_reader: &mut ZipArchive<File>,
table_name: &str,
index: IndexFn,
filter: FilterT,
) where
K: Eq + Hash,
V: DeserializeOwned,
IndexFn: Fn(&V) -> K,
FilterT: Filter<V>,
{
let file_reader = zip_reader.by_name(table_name).unwrap();
let mut rdr = csv::Reader::from_reader(file_reader);
for row in rdr.deserialize() {
if row.is_ok() {
let record: V = row.unwrap();
if filter.accept(&record) {
let idx: K = index(&record);
destination.insert(idx, record);
}
} else {
print!("Row failed to deserialize row {:#?}", row.err());
panic!();
}
}
}
// Loads a HashMap of a vector of the selected type, using the provided index function as the key
// And a predicate as a filter
fn load_vector_map<'a, K, V, IndexFn, FilterT>(
destination: &mut HashMap<K, Vec<V>>,
zip_reader: &mut ZipArchive<File>,
table_name: &str,
index: IndexFn,
filter: FilterT,
) where
K: Eq + Hash,
V: DeserializeOwned,
IndexFn: Fn(&V) -> K,
FilterT: Filter<V>,
{
let file_reader = zip_reader.by_name(table_name).unwrap();
let mut rdr = csv::Reader::from_reader(file_reader);
for row in rdr.deserialize() {
let record: V = row.unwrap();
if filter.accept(&record) {
let idx = index(&record);
destination.entry(idx).or_insert_with(Vec::new).push(record);
}
}
}
pub fn load_gtfs(
gtfs: &mut Gtfs,
zip_reader: &mut ZipArchive<File>,
route_numbers: &HashSet<String>,
stop_codes: &HashSet<String>,
) {
// Agencies
load_vector(&mut gtfs.agencies, zip_reader, "agency.txt");
// Calendars
load_map(
&mut gtfs.calendar,
zip_reader,
"calendar.txt",
|c: &Calendar| String::from(&c.id),
LoadAll {},
);
// Calendar Dates
load_vector_map(
&mut gtfs.calendar_dates,
zip_reader,
"calendar_dates.txt",
|d: &CalendarDate| String::from(&d.service_id),
LoadAll {},
);
// Routes
load_map(
&mut gtfs.routes,
zip_reader,
"routes.txt",
|r: &Route| String::from(&r.id),
LoadRoutes {
routes: &route_numbers,
},
);
// Stops
load_map(
&mut gtfs.stops,
zip_reader,
"stops.txt",
|s: &Stop| String::from(&s.id),
LoadStops { stops: &stop_codes },
);
let route_ids = route_ids_from_numbers(&gtfs, &route_numbers);
// Trips
load_map(
&mut gtfs.trips,
zip_reader,
"trips.txt",
|t: &RawTrip| String::from(&t.id),
LoadTrips {
route_ids: &route_ids,
},
);
// Load stop times for the chosen routes and stops
let stop_ids = stop_ids_from_codes(&gtfs, &stop_codes);
let trip_ids = HashSet::<String>::from_iter(gtfs.trips.keys().cloned());
load_map(
&mut gtfs.stop_times,
zip_reader,
"stop_times.txt",
|st: &RawStopTime| (st.trip_id.clone(), st.stop_sequence),
LoadStopTimes {
trip_ids: &trip_ids,
stop_ids: &stop_ids,
},
);
}