Rearranged the existing code to make it more manageable.
This commit is contained in:
parent
774d4e4c80
commit
a22e1ee3fa
|
|
@ -1,32 +1,9 @@
|
||||||
use gtfs_structures::{Agency, Calendar, CalendarDate, RawStopTime, RawTrip, Route, Stop};
|
use std::{collections::{HashMap, HashSet}, fs::File, hash::Hash};
|
||||||
use serde::{self, de::DeserializeOwned};
|
use gtfs_structures::{Calendar, CalendarDate, RawStopTime, RawTrip, Route, Stop};
|
||||||
use std::{
|
use serde::de::DeserializeOwned;
|
||||||
collections::{HashMap, HashSet},
|
|
||||||
fs::File,
|
|
||||||
hash::Hash,
|
|
||||||
};
|
|
||||||
|
|
||||||
use zip::ZipArchive;
|
use zip::ZipArchive;
|
||||||
|
|
||||||
|
use crate::gtfs::{structs::Gtfs, utils::{route_ids_from_numbers, stop_ids_from_codes}};
|
||||||
// The main GTFS struct. This is similar to (but not exactly) gtfs-structures::Gtfs because we don't need everything
|
|
||||||
#[derive(Debug)]
|
|
||||||
pub struct Gtfs {
|
|
||||||
/// All agencies. They can not be read by `agency_id`, as it is not a required field
|
|
||||||
pub agencies: Vec<Agency>,
|
|
||||||
/// All Calendar by `service_id`
|
|
||||||
pub calendar: HashMap<String, Calendar>,
|
|
||||||
/// All calendar dates grouped by service_id
|
|
||||||
pub calendar_dates: HashMap<String, Vec<CalendarDate>>,
|
|
||||||
/// All routes by `route_id`
|
|
||||||
pub routes: HashMap<String, Route>,
|
|
||||||
/// All stop by `stop_id`.
|
|
||||||
pub stops: HashMap<String, Stop>,
|
|
||||||
/// All trips by trip_id
|
|
||||||
pub trips: HashMap<String, RawTrip>,
|
|
||||||
/// Stop times for the chosen stops and the chosen routes
|
|
||||||
pub stop_times: HashMap<(String, u32), RawStopTime>,
|
|
||||||
}
|
|
||||||
|
|
||||||
trait Filter<T> {
|
trait Filter<T> {
|
||||||
fn accept(&self, v: &T) -> bool;
|
fn accept(&self, v: &T) -> bool;
|
||||||
|
|
@ -127,70 +104,40 @@ fn load_map<K, V, IndexFn, FilterT>(
|
||||||
|
|
||||||
// Loads a HashMap of a vector of the selected type, using the provided index function as the key
|
// Loads a HashMap of a vector of the selected type, using the provided index function as the key
|
||||||
// And a predicate as a filter
|
// And a predicate as a filter
|
||||||
fn load_vector_map<'a, V: DeserializeOwned + Clone>(
|
fn load_vector_map<'a, K, V, IndexFn, FilterT>(
|
||||||
destination: &mut HashMap<String, Vec<V>>,
|
destination: &mut HashMap<K, Vec<V>>,
|
||||||
zip_reader: &mut ZipArchive<File>,
|
zip_reader: &mut ZipArchive<File>,
|
||||||
table_name: &str,
|
table_name: &str,
|
||||||
index: fn(&V) -> String,
|
index: IndexFn,
|
||||||
filter: impl Filter<V>,
|
filter: FilterT,
|
||||||
) {
|
) where
|
||||||
|
K: Eq + Hash,
|
||||||
|
V: DeserializeOwned,
|
||||||
|
IndexFn: Fn(&V) -> K,
|
||||||
|
FilterT: Filter<V>,
|
||||||
|
{
|
||||||
let file_reader = zip_reader.by_name(table_name).unwrap();
|
let file_reader = zip_reader.by_name(table_name).unwrap();
|
||||||
let mut rdr = csv::Reader::from_reader(file_reader);
|
let mut rdr = csv::Reader::from_reader(file_reader);
|
||||||
|
|
||||||
for row in rdr.deserialize() {
|
for row in rdr.deserialize() {
|
||||||
let record: V = row.unwrap();
|
let record: V = row.unwrap();
|
||||||
if filter.accept(&record) {
|
if filter.accept(&record) {
|
||||||
let idx: String = index(&record);
|
let idx = index(&record);
|
||||||
destination.entry(idx).or_insert_with(Vec::new).push(record);
|
destination.entry(idx).or_insert_with(Vec::new).push(record);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn stop_ids_from_codes(gtfs: &Gtfs, stop_codes: &HashSet<String>) -> HashSet<String> {
|
|
||||||
let mut ids: HashSet<String> = HashSet::new();
|
|
||||||
|
|
||||||
for stop in >fs.stops {
|
pub fn load_gtfs(gtfs: &mut Gtfs, zip_reader: &mut ZipArchive<File>, route_numbers: HashSet<String>, stop_codes: HashSet<String>) {
|
||||||
let stop_number = stop.1.code.as_ref();
|
|
||||||
if stop_number.is_some() && stop_codes.contains(stop_number.unwrap().as_str()) {
|
|
||||||
ids.insert(stop.0.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return ids;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn route_ids_from_numbers(gtfs: &Gtfs, route_numbers: &HashSet<String>) -> HashSet<String> {
|
|
||||||
let mut ids: HashSet<String> = HashSet::new();
|
|
||||||
|
|
||||||
for route in >fs.routes {
|
|
||||||
let route_number = route.1.short_name.as_ref();
|
|
||||||
if route_number.is_some() && route_numbers.contains(route_number.unwrap().as_str()) {
|
|
||||||
ids.insert(route.0.clone());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return ids;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn init(src_file: &str, route_numbers: HashSet<String>, stop_codes: HashSet<String>) -> Gtfs {
|
|
||||||
// Open zip file
|
|
||||||
let mut zip_reader = zip::ZipArchive::new(File::open(src_file).unwrap()).unwrap();
|
|
||||||
|
|
||||||
let mut gtfs: Gtfs = Gtfs {
|
|
||||||
agencies: Vec::new(),
|
|
||||||
calendar: HashMap::new(),
|
|
||||||
calendar_dates: HashMap::new(),
|
|
||||||
routes: HashMap::new(),
|
|
||||||
stops: HashMap::new(),
|
|
||||||
trips: HashMap::new(),
|
|
||||||
stop_times: HashMap::new(),
|
|
||||||
};
|
|
||||||
|
|
||||||
// Agencies
|
// Agencies
|
||||||
load_vector(&mut gtfs.agencies, &mut zip_reader, "agency.txt");
|
load_vector(&mut gtfs.agencies, zip_reader, "agency.txt");
|
||||||
|
|
||||||
// Calendars
|
// Calendars
|
||||||
load_map(
|
load_map(
|
||||||
&mut gtfs.calendar,
|
&mut gtfs.calendar,
|
||||||
&mut zip_reader,
|
zip_reader,
|
||||||
"calendar.txt",
|
"calendar.txt",
|
||||||
|c: &Calendar| String::from(&c.id),
|
|c: &Calendar| String::from(&c.id),
|
||||||
LoadAll {},
|
LoadAll {},
|
||||||
|
|
@ -199,7 +146,7 @@ pub fn init(src_file: &str, route_numbers: HashSet<String>, stop_codes: HashSet<
|
||||||
// Calendar Dates
|
// Calendar Dates
|
||||||
load_vector_map(
|
load_vector_map(
|
||||||
&mut gtfs.calendar_dates,
|
&mut gtfs.calendar_dates,
|
||||||
&mut zip_reader,
|
zip_reader,
|
||||||
"calendar_dates.txt",
|
"calendar_dates.txt",
|
||||||
|d: &CalendarDate| String::from(&d.service_id),
|
|d: &CalendarDate| String::from(&d.service_id),
|
||||||
LoadAll {},
|
LoadAll {},
|
||||||
|
|
@ -208,7 +155,7 @@ pub fn init(src_file: &str, route_numbers: HashSet<String>, stop_codes: HashSet<
|
||||||
// Routes
|
// Routes
|
||||||
load_map(
|
load_map(
|
||||||
&mut gtfs.routes,
|
&mut gtfs.routes,
|
||||||
&mut zip_reader,
|
zip_reader,
|
||||||
"routes.txt",
|
"routes.txt",
|
||||||
|r: &Route| String::from(&r.id),
|
|r: &Route| String::from(&r.id),
|
||||||
LoadRoutes {
|
LoadRoutes {
|
||||||
|
|
@ -219,7 +166,7 @@ pub fn init(src_file: &str, route_numbers: HashSet<String>, stop_codes: HashSet<
|
||||||
// Stops
|
// Stops
|
||||||
load_map(
|
load_map(
|
||||||
&mut gtfs.stops,
|
&mut gtfs.stops,
|
||||||
&mut zip_reader,
|
zip_reader,
|
||||||
"stops.txt",
|
"stops.txt",
|
||||||
|s: &Stop| String::from(&s.id),
|
|s: &Stop| String::from(&s.id),
|
||||||
LoadStops { stops: &stop_codes },
|
LoadStops { stops: &stop_codes },
|
||||||
|
|
@ -229,7 +176,7 @@ pub fn init(src_file: &str, route_numbers: HashSet<String>, stop_codes: HashSet<
|
||||||
// Trips
|
// Trips
|
||||||
load_map(
|
load_map(
|
||||||
&mut gtfs.trips,
|
&mut gtfs.trips,
|
||||||
&mut zip_reader,
|
zip_reader,
|
||||||
"trips.txt",
|
"trips.txt",
|
||||||
|t: &RawTrip| String::from(&t.id),
|
|t: &RawTrip| String::from(&t.id),
|
||||||
LoadTrips {
|
LoadTrips {
|
||||||
|
|
@ -242,7 +189,7 @@ pub fn init(src_file: &str, route_numbers: HashSet<String>, stop_codes: HashSet<
|
||||||
let trip_ids = HashSet::<String>::from_iter(gtfs.trips.keys().cloned());
|
let trip_ids = HashSet::<String>::from_iter(gtfs.trips.keys().cloned());
|
||||||
load_map(
|
load_map(
|
||||||
&mut gtfs.stop_times,
|
&mut gtfs.stop_times,
|
||||||
&mut zip_reader,
|
zip_reader,
|
||||||
"stop_times.txt",
|
"stop_times.txt",
|
||||||
|st: &RawStopTime| (st.trip_id.clone(), st.stop_sequence),
|
|st: &RawStopTime| (st.trip_id.clone(), st.stop_sequence),
|
||||||
LoadStopTimes {
|
LoadStopTimes {
|
||||||
|
|
@ -250,6 +197,4 @@ pub fn init(src_file: &str, route_numbers: HashSet<String>, stop_codes: HashSet<
|
||||||
stop_ids: &stop_ids,
|
stop_ids: &stop_ids,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
return gtfs;
|
|
||||||
}
|
}
|
||||||
|
|
@ -0,0 +1,30 @@
|
||||||
|
mod loader;
|
||||||
|
mod utils;
|
||||||
|
mod structs;
|
||||||
|
use std::{
|
||||||
|
collections::{HashMap, HashSet},
|
||||||
|
fs::File,
|
||||||
|
};
|
||||||
|
use crate::gtfs::{loader::load_gtfs, structs::Gtfs};
|
||||||
|
|
||||||
|
|
||||||
|
/// Load a GTFS structure from a zip file
|
||||||
|
pub fn load(src_file: &str, route_numbers: HashSet<String>, stop_codes: HashSet<String>) -> Gtfs {
|
||||||
|
// Open zip file
|
||||||
|
let mut zip_reader = zip::ZipArchive::new(File::open(src_file).unwrap()).unwrap();
|
||||||
|
|
||||||
|
let mut gtfs: Gtfs = Gtfs {
|
||||||
|
agencies: Vec::new(),
|
||||||
|
calendar: HashMap::new(),
|
||||||
|
calendar_dates: HashMap::new(),
|
||||||
|
routes: HashMap::new(),
|
||||||
|
stops: HashMap::new(),
|
||||||
|
trips: HashMap::new(),
|
||||||
|
stop_times: HashMap::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
load_gtfs(&mut gtfs, &mut zip_reader, route_numbers, stop_codes);
|
||||||
|
|
||||||
|
return gtfs;
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,22 @@
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use gtfs_structures::{Agency, Calendar, CalendarDate, RawStopTime, RawTrip, Route, Stop};
|
||||||
|
|
||||||
|
// The main GTFS struct. This is similar to (but not exactly) gtfs-structures::Gtfs because we don't need everything
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Gtfs {
|
||||||
|
/// All agencies. They can not be read by `agency_id`, as it is not a required field
|
||||||
|
pub agencies: Vec<Agency>,
|
||||||
|
/// All Calendar by `service_id`
|
||||||
|
pub calendar: HashMap<String, Calendar>,
|
||||||
|
/// All calendar dates grouped by service_id
|
||||||
|
pub calendar_dates: HashMap<String, Vec<CalendarDate>>,
|
||||||
|
/// All routes by `route_id`
|
||||||
|
pub routes: HashMap<String, Route>,
|
||||||
|
/// All stop by `stop_id`.
|
||||||
|
pub stops: HashMap<String, Stop>,
|
||||||
|
/// All trips by trip_id
|
||||||
|
pub trips: HashMap<String, RawTrip>,
|
||||||
|
/// Stop times for the chosen stops and the chosen routes
|
||||||
|
pub stop_times: HashMap<(String, u32), RawStopTime>,
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,27 @@
|
||||||
|
use std::collections::HashSet;
|
||||||
|
use crate::gtfs::structs::Gtfs;
|
||||||
|
|
||||||
|
|
||||||
|
pub fn stop_ids_from_codes(gtfs: &Gtfs, stop_codes: &HashSet<String>) -> HashSet<String> {
|
||||||
|
let mut ids: HashSet<String> = HashSet::new();
|
||||||
|
|
||||||
|
for stop in >fs.stops {
|
||||||
|
let stop_number = stop.1.code.as_ref();
|
||||||
|
if stop_number.is_some() && stop_codes.contains(stop_number.unwrap().as_str()) {
|
||||||
|
ids.insert(stop.0.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ids;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn route_ids_from_numbers(gtfs: &Gtfs, route_numbers: &HashSet<String>) -> HashSet<String> {
|
||||||
|
let mut ids: HashSet<String> = HashSet::new();
|
||||||
|
|
||||||
|
for route in >fs.routes {
|
||||||
|
let route_number = route.1.short_name.as_ref();
|
||||||
|
if route_number.is_some() && route_numbers.contains(route_number.unwrap().as_str()) {
|
||||||
|
ids.insert(route.0.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ids;
|
||||||
|
}
|
||||||
|
|
@ -1,6 +1,5 @@
|
||||||
use std::{collections::HashSet, time::Instant};
|
|
||||||
|
|
||||||
mod gtfs;
|
mod gtfs;
|
||||||
|
use std::{collections::HashSet, time::Instant};
|
||||||
|
|
||||||
const SRC_FILE: &str = "/home/nahuel/Downloads/GTFS_Realtime.zip";
|
const SRC_FILE: &str = "/home/nahuel/Downloads/GTFS_Realtime.zip";
|
||||||
|
|
||||||
|
|
@ -9,7 +8,7 @@ fn main() {
|
||||||
for _ in 0..1000 {
|
for _ in 0..1000 {
|
||||||
let start_gtfs = Instant::now();
|
let start_gtfs = Instant::now();
|
||||||
println!("Loading GTFS data...");
|
println!("Loading GTFS data...");
|
||||||
let gtfs = gtfs::init(SRC_FILE,
|
let gtfs = gtfs::load(SRC_FILE,
|
||||||
HashSet::from([String::from("15A"), String::from("F1"), String::from("F2"), String::from("F3")]),
|
HashSet::from([String::from("15A"), String::from("F1"), String::from("F2"), String::from("F3")]),
|
||||||
HashSet::from([String::from("1117")]));
|
HashSet::from([String::from("1117")]));
|
||||||
println!("Loaded records in {:#?}. Data size: {:#?}", start_gtfs.elapsed(), ::std::mem::size_of_val(>fs))
|
println!("Loaded records in {:#?}. Data size: {:#?}", start_gtfs.elapsed(), ::std::mem::size_of_val(>fs))
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue