From a22e1ee3fa440452e89e62d1b85b42d83f527225 Mon Sep 17 00:00:00 2001 From: Nahuel Lofeudo Date: Sat, 28 Mar 2026 09:17:49 +0000 Subject: [PATCH] Rearranged the existing code to make it more manageable. --- src/{gtfs.rs => gtfs/loader.rs} | 101 ++++++++------------------------ src/gtfs/mod.rs | 30 ++++++++++ src/gtfs/structs.rs | 22 +++++++ src/gtfs/utils.rs | 27 +++++++++ src/main.rs | 7 +-- 5 files changed, 105 insertions(+), 82 deletions(-) rename src/{gtfs.rs => gtfs/loader.rs} (62%) create mode 100644 src/gtfs/mod.rs create mode 100644 src/gtfs/structs.rs create mode 100644 src/gtfs/utils.rs diff --git a/src/gtfs.rs b/src/gtfs/loader.rs similarity index 62% rename from src/gtfs.rs rename to src/gtfs/loader.rs index 47b0963..159baaa 100644 --- a/src/gtfs.rs +++ b/src/gtfs/loader.rs @@ -1,32 +1,9 @@ -use gtfs_structures::{Agency, Calendar, CalendarDate, RawStopTime, RawTrip, Route, Stop}; -use serde::{self, de::DeserializeOwned}; -use std::{ - collections::{HashMap, HashSet}, - fs::File, - hash::Hash, -}; - +use std::{collections::{HashMap, HashSet}, fs::File, hash::Hash}; +use gtfs_structures::{Calendar, CalendarDate, RawStopTime, RawTrip, Route, Stop}; +use serde::de::DeserializeOwned; use zip::ZipArchive; - -// The main GTFS struct. This is similar to (but not exactly) gtfs-structures::Gtfs because we don't need everything -#[derive(Debug)] -pub struct Gtfs { - /// All agencies. They can not be read by `agency_id`, as it is not a required field - pub agencies: Vec, - /// All Calendar by `service_id` - pub calendar: HashMap, - /// All calendar dates grouped by service_id - pub calendar_dates: HashMap>, - /// All routes by `route_id` - pub routes: HashMap, - /// All stop by `stop_id`. - pub stops: HashMap, - /// All trips by trip_id - pub trips: HashMap, - /// Stop times for the chosen stops and the chosen routes - pub stop_times: HashMap<(String, u32), RawStopTime>, -} +use crate::gtfs::{structs::Gtfs, utils::{route_ids_from_numbers, stop_ids_from_codes}}; trait Filter { fn accept(&self, v: &T) -> bool; @@ -127,70 +104,40 @@ fn load_map( // Loads a HashMap of a vector of the selected type, using the provided index function as the key // And a predicate as a filter -fn load_vector_map<'a, V: DeserializeOwned + Clone>( - destination: &mut HashMap>, +fn load_vector_map<'a, K, V, IndexFn, FilterT>( + destination: &mut HashMap>, zip_reader: &mut ZipArchive, table_name: &str, - index: fn(&V) -> String, - filter: impl Filter, -) { + index: IndexFn, + filter: FilterT, +) where + K: Eq + Hash, + V: DeserializeOwned, + IndexFn: Fn(&V) -> K, + FilterT: Filter, +{ let file_reader = zip_reader.by_name(table_name).unwrap(); let mut rdr = csv::Reader::from_reader(file_reader); for row in rdr.deserialize() { let record: V = row.unwrap(); if filter.accept(&record) { - let idx: String = index(&record); + let idx = index(&record); destination.entry(idx).or_insert_with(Vec::new).push(record); } } } -fn stop_ids_from_codes(gtfs: &Gtfs, stop_codes: &HashSet) -> HashSet { - let mut ids: HashSet = HashSet::new(); - for stop in >fs.stops { - let stop_number = stop.1.code.as_ref(); - if stop_number.is_some() && stop_codes.contains(stop_number.unwrap().as_str()) { - ids.insert(stop.0.clone()); - } - } - return ids; -} - -fn route_ids_from_numbers(gtfs: &Gtfs, route_numbers: &HashSet) -> HashSet { - let mut ids: HashSet = HashSet::new(); - - for route in >fs.routes { - let route_number = route.1.short_name.as_ref(); - if route_number.is_some() && route_numbers.contains(route_number.unwrap().as_str()) { - ids.insert(route.0.clone()); - } - } - return ids; -} - -pub fn init(src_file: &str, route_numbers: HashSet, stop_codes: HashSet) -> Gtfs { - // Open zip file - let mut zip_reader = zip::ZipArchive::new(File::open(src_file).unwrap()).unwrap(); - - let mut gtfs: Gtfs = Gtfs { - agencies: Vec::new(), - calendar: HashMap::new(), - calendar_dates: HashMap::new(), - routes: HashMap::new(), - stops: HashMap::new(), - trips: HashMap::new(), - stop_times: HashMap::new(), - }; +pub fn load_gtfs(gtfs: &mut Gtfs, zip_reader: &mut ZipArchive, route_numbers: HashSet, stop_codes: HashSet) { // Agencies - load_vector(&mut gtfs.agencies, &mut zip_reader, "agency.txt"); + load_vector(&mut gtfs.agencies, zip_reader, "agency.txt"); // Calendars load_map( &mut gtfs.calendar, - &mut zip_reader, + zip_reader, "calendar.txt", |c: &Calendar| String::from(&c.id), LoadAll {}, @@ -199,7 +146,7 @@ pub fn init(src_file: &str, route_numbers: HashSet, stop_codes: HashSet< // Calendar Dates load_vector_map( &mut gtfs.calendar_dates, - &mut zip_reader, + zip_reader, "calendar_dates.txt", |d: &CalendarDate| String::from(&d.service_id), LoadAll {}, @@ -208,7 +155,7 @@ pub fn init(src_file: &str, route_numbers: HashSet, stop_codes: HashSet< // Routes load_map( &mut gtfs.routes, - &mut zip_reader, + zip_reader, "routes.txt", |r: &Route| String::from(&r.id), LoadRoutes { @@ -219,7 +166,7 @@ pub fn init(src_file: &str, route_numbers: HashSet, stop_codes: HashSet< // Stops load_map( &mut gtfs.stops, - &mut zip_reader, + zip_reader, "stops.txt", |s: &Stop| String::from(&s.id), LoadStops { stops: &stop_codes }, @@ -229,7 +176,7 @@ pub fn init(src_file: &str, route_numbers: HashSet, stop_codes: HashSet< // Trips load_map( &mut gtfs.trips, - &mut zip_reader, + zip_reader, "trips.txt", |t: &RawTrip| String::from(&t.id), LoadTrips { @@ -242,7 +189,7 @@ pub fn init(src_file: &str, route_numbers: HashSet, stop_codes: HashSet< let trip_ids = HashSet::::from_iter(gtfs.trips.keys().cloned()); load_map( &mut gtfs.stop_times, - &mut zip_reader, + zip_reader, "stop_times.txt", |st: &RawStopTime| (st.trip_id.clone(), st.stop_sequence), LoadStopTimes { @@ -250,6 +197,4 @@ pub fn init(src_file: &str, route_numbers: HashSet, stop_codes: HashSet< stop_ids: &stop_ids, }, ); - - return gtfs; } diff --git a/src/gtfs/mod.rs b/src/gtfs/mod.rs new file mode 100644 index 0000000..7256671 --- /dev/null +++ b/src/gtfs/mod.rs @@ -0,0 +1,30 @@ +mod loader; +mod utils; +mod structs; +use std::{ + collections::{HashMap, HashSet}, + fs::File, +}; +use crate::gtfs::{loader::load_gtfs, structs::Gtfs}; + + +/// Load a GTFS structure from a zip file +pub fn load(src_file: &str, route_numbers: HashSet, stop_codes: HashSet) -> Gtfs { + // Open zip file + let mut zip_reader = zip::ZipArchive::new(File::open(src_file).unwrap()).unwrap(); + + let mut gtfs: Gtfs = Gtfs { + agencies: Vec::new(), + calendar: HashMap::new(), + calendar_dates: HashMap::new(), + routes: HashMap::new(), + stops: HashMap::new(), + trips: HashMap::new(), + stop_times: HashMap::new(), + }; + + + load_gtfs(&mut gtfs, &mut zip_reader, route_numbers, stop_codes); + + return gtfs; +} \ No newline at end of file diff --git a/src/gtfs/structs.rs b/src/gtfs/structs.rs new file mode 100644 index 0000000..702d3c0 --- /dev/null +++ b/src/gtfs/structs.rs @@ -0,0 +1,22 @@ +use std::collections::HashMap; + +use gtfs_structures::{Agency, Calendar, CalendarDate, RawStopTime, RawTrip, Route, Stop}; + +// The main GTFS struct. This is similar to (but not exactly) gtfs-structures::Gtfs because we don't need everything +#[derive(Debug)] +pub struct Gtfs { + /// All agencies. They can not be read by `agency_id`, as it is not a required field + pub agencies: Vec, + /// All Calendar by `service_id` + pub calendar: HashMap, + /// All calendar dates grouped by service_id + pub calendar_dates: HashMap>, + /// All routes by `route_id` + pub routes: HashMap, + /// All stop by `stop_id`. + pub stops: HashMap, + /// All trips by trip_id + pub trips: HashMap, + /// Stop times for the chosen stops and the chosen routes + pub stop_times: HashMap<(String, u32), RawStopTime>, +} diff --git a/src/gtfs/utils.rs b/src/gtfs/utils.rs new file mode 100644 index 0000000..22d86dd --- /dev/null +++ b/src/gtfs/utils.rs @@ -0,0 +1,27 @@ +use std::collections::HashSet; +use crate::gtfs::structs::Gtfs; + + +pub fn stop_ids_from_codes(gtfs: &Gtfs, stop_codes: &HashSet) -> HashSet { + let mut ids: HashSet = HashSet::new(); + + for stop in >fs.stops { + let stop_number = stop.1.code.as_ref(); + if stop_number.is_some() && stop_codes.contains(stop_number.unwrap().as_str()) { + ids.insert(stop.0.clone()); + } + } + return ids; +} + +pub fn route_ids_from_numbers(gtfs: &Gtfs, route_numbers: &HashSet) -> HashSet { + let mut ids: HashSet = HashSet::new(); + + for route in >fs.routes { + let route_number = route.1.short_name.as_ref(); + if route_number.is_some() && route_numbers.contains(route_number.unwrap().as_str()) { + ids.insert(route.0.clone()); + } + } + return ids; +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 1a13ce0..404d07d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,5 @@ -use std::{collections::HashSet, time::Instant}; - mod gtfs; +use std::{collections::HashSet, time::Instant}; const SRC_FILE: &str = "/home/nahuel/Downloads/GTFS_Realtime.zip"; @@ -9,9 +8,9 @@ fn main() { for _ in 0..1000 { let start_gtfs = Instant::now(); println!("Loading GTFS data..."); - let gtfs = gtfs::init(SRC_FILE, + let gtfs = gtfs::load(SRC_FILE, HashSet::from([String::from("15A"), String::from("F1"), String::from("F2"), String::from("F3")]), - HashSet::from([String::from("1117")])); + HashSet::from([String::from("1117")])); println!("Loaded records in {:#?}. Data size: {:#?}", start_gtfs.elapsed(), ::std::mem::size_of_val(>fs)) }