Rearranged the existing code to make it more manageable.

This commit is contained in:
Nahuel Lofeudo 2026-03-28 09:17:49 +00:00
parent 774d4e4c80
commit a22e1ee3fa
5 changed files with 105 additions and 82 deletions

View File

@ -1,32 +1,9 @@
use gtfs_structures::{Agency, Calendar, CalendarDate, RawStopTime, RawTrip, Route, Stop}; use std::{collections::{HashMap, HashSet}, fs::File, hash::Hash};
use serde::{self, de::DeserializeOwned}; use gtfs_structures::{Calendar, CalendarDate, RawStopTime, RawTrip, Route, Stop};
use std::{ use serde::de::DeserializeOwned;
collections::{HashMap, HashSet},
fs::File,
hash::Hash,
};
use zip::ZipArchive; use zip::ZipArchive;
use crate::gtfs::{structs::Gtfs, utils::{route_ids_from_numbers, stop_ids_from_codes}};
// The main GTFS struct. This is similar to (but not exactly) gtfs-structures::Gtfs because we don't need everything
#[derive(Debug)]
pub struct Gtfs {
/// All agencies. They can not be read by `agency_id`, as it is not a required field
pub agencies: Vec<Agency>,
/// All Calendar by `service_id`
pub calendar: HashMap<String, Calendar>,
/// All calendar dates grouped by service_id
pub calendar_dates: HashMap<String, Vec<CalendarDate>>,
/// All routes by `route_id`
pub routes: HashMap<String, Route>,
/// All stop by `stop_id`.
pub stops: HashMap<String, Stop>,
/// All trips by trip_id
pub trips: HashMap<String, RawTrip>,
/// Stop times for the chosen stops and the chosen routes
pub stop_times: HashMap<(String, u32), RawStopTime>,
}
trait Filter<T> { trait Filter<T> {
fn accept(&self, v: &T) -> bool; fn accept(&self, v: &T) -> bool;
@ -127,70 +104,40 @@ fn load_map<K, V, IndexFn, FilterT>(
// Loads a HashMap of a vector of the selected type, using the provided index function as the key // Loads a HashMap of a vector of the selected type, using the provided index function as the key
// And a predicate as a filter // And a predicate as a filter
fn load_vector_map<'a, V: DeserializeOwned + Clone>( fn load_vector_map<'a, K, V, IndexFn, FilterT>(
destination: &mut HashMap<String, Vec<V>>, destination: &mut HashMap<K, Vec<V>>,
zip_reader: &mut ZipArchive<File>, zip_reader: &mut ZipArchive<File>,
table_name: &str, table_name: &str,
index: fn(&V) -> String, index: IndexFn,
filter: impl Filter<V>, filter: FilterT,
) { ) where
K: Eq + Hash,
V: DeserializeOwned,
IndexFn: Fn(&V) -> K,
FilterT: Filter<V>,
{
let file_reader = zip_reader.by_name(table_name).unwrap(); let file_reader = zip_reader.by_name(table_name).unwrap();
let mut rdr = csv::Reader::from_reader(file_reader); let mut rdr = csv::Reader::from_reader(file_reader);
for row in rdr.deserialize() { for row in rdr.deserialize() {
let record: V = row.unwrap(); let record: V = row.unwrap();
if filter.accept(&record) { if filter.accept(&record) {
let idx: String = index(&record); let idx = index(&record);
destination.entry(idx).or_insert_with(Vec::new).push(record); destination.entry(idx).or_insert_with(Vec::new).push(record);
} }
} }
} }
fn stop_ids_from_codes(gtfs: &Gtfs, stop_codes: &HashSet<String>) -> HashSet<String> {
let mut ids: HashSet<String> = HashSet::new();
for stop in &gtfs.stops { pub fn load_gtfs(gtfs: &mut Gtfs, zip_reader: &mut ZipArchive<File>, route_numbers: HashSet<String>, stop_codes: HashSet<String>) {
let stop_number = stop.1.code.as_ref();
if stop_number.is_some() && stop_codes.contains(stop_number.unwrap().as_str()) {
ids.insert(stop.0.clone());
}
}
return ids;
}
fn route_ids_from_numbers(gtfs: &Gtfs, route_numbers: &HashSet<String>) -> HashSet<String> {
let mut ids: HashSet<String> = HashSet::new();
for route in &gtfs.routes {
let route_number = route.1.short_name.as_ref();
if route_number.is_some() && route_numbers.contains(route_number.unwrap().as_str()) {
ids.insert(route.0.clone());
}
}
return ids;
}
pub fn init(src_file: &str, route_numbers: HashSet<String>, stop_codes: HashSet<String>) -> Gtfs {
// Open zip file
let mut zip_reader = zip::ZipArchive::new(File::open(src_file).unwrap()).unwrap();
let mut gtfs: Gtfs = Gtfs {
agencies: Vec::new(),
calendar: HashMap::new(),
calendar_dates: HashMap::new(),
routes: HashMap::new(),
stops: HashMap::new(),
trips: HashMap::new(),
stop_times: HashMap::new(),
};
// Agencies // Agencies
load_vector(&mut gtfs.agencies, &mut zip_reader, "agency.txt"); load_vector(&mut gtfs.agencies, zip_reader, "agency.txt");
// Calendars // Calendars
load_map( load_map(
&mut gtfs.calendar, &mut gtfs.calendar,
&mut zip_reader, zip_reader,
"calendar.txt", "calendar.txt",
|c: &Calendar| String::from(&c.id), |c: &Calendar| String::from(&c.id),
LoadAll {}, LoadAll {},
@ -199,7 +146,7 @@ pub fn init(src_file: &str, route_numbers: HashSet<String>, stop_codes: HashSet<
// Calendar Dates // Calendar Dates
load_vector_map( load_vector_map(
&mut gtfs.calendar_dates, &mut gtfs.calendar_dates,
&mut zip_reader, zip_reader,
"calendar_dates.txt", "calendar_dates.txt",
|d: &CalendarDate| String::from(&d.service_id), |d: &CalendarDate| String::from(&d.service_id),
LoadAll {}, LoadAll {},
@ -208,7 +155,7 @@ pub fn init(src_file: &str, route_numbers: HashSet<String>, stop_codes: HashSet<
// Routes // Routes
load_map( load_map(
&mut gtfs.routes, &mut gtfs.routes,
&mut zip_reader, zip_reader,
"routes.txt", "routes.txt",
|r: &Route| String::from(&r.id), |r: &Route| String::from(&r.id),
LoadRoutes { LoadRoutes {
@ -219,7 +166,7 @@ pub fn init(src_file: &str, route_numbers: HashSet<String>, stop_codes: HashSet<
// Stops // Stops
load_map( load_map(
&mut gtfs.stops, &mut gtfs.stops,
&mut zip_reader, zip_reader,
"stops.txt", "stops.txt",
|s: &Stop| String::from(&s.id), |s: &Stop| String::from(&s.id),
LoadStops { stops: &stop_codes }, LoadStops { stops: &stop_codes },
@ -229,7 +176,7 @@ pub fn init(src_file: &str, route_numbers: HashSet<String>, stop_codes: HashSet<
// Trips // Trips
load_map( load_map(
&mut gtfs.trips, &mut gtfs.trips,
&mut zip_reader, zip_reader,
"trips.txt", "trips.txt",
|t: &RawTrip| String::from(&t.id), |t: &RawTrip| String::from(&t.id),
LoadTrips { LoadTrips {
@ -242,7 +189,7 @@ pub fn init(src_file: &str, route_numbers: HashSet<String>, stop_codes: HashSet<
let trip_ids = HashSet::<String>::from_iter(gtfs.trips.keys().cloned()); let trip_ids = HashSet::<String>::from_iter(gtfs.trips.keys().cloned());
load_map( load_map(
&mut gtfs.stop_times, &mut gtfs.stop_times,
&mut zip_reader, zip_reader,
"stop_times.txt", "stop_times.txt",
|st: &RawStopTime| (st.trip_id.clone(), st.stop_sequence), |st: &RawStopTime| (st.trip_id.clone(), st.stop_sequence),
LoadStopTimes { LoadStopTimes {
@ -250,6 +197,4 @@ pub fn init(src_file: &str, route_numbers: HashSet<String>, stop_codes: HashSet<
stop_ids: &stop_ids, stop_ids: &stop_ids,
}, },
); );
return gtfs;
} }

30
src/gtfs/mod.rs Normal file
View File

@ -0,0 +1,30 @@
mod loader;
mod utils;
mod structs;
use std::{
collections::{HashMap, HashSet},
fs::File,
};
use crate::gtfs::{loader::load_gtfs, structs::Gtfs};
/// Load a GTFS structure from a zip file
pub fn load(src_file: &str, route_numbers: HashSet<String>, stop_codes: HashSet<String>) -> Gtfs {
// Open zip file
let mut zip_reader = zip::ZipArchive::new(File::open(src_file).unwrap()).unwrap();
let mut gtfs: Gtfs = Gtfs {
agencies: Vec::new(),
calendar: HashMap::new(),
calendar_dates: HashMap::new(),
routes: HashMap::new(),
stops: HashMap::new(),
trips: HashMap::new(),
stop_times: HashMap::new(),
};
load_gtfs(&mut gtfs, &mut zip_reader, route_numbers, stop_codes);
return gtfs;
}

22
src/gtfs/structs.rs Normal file
View File

@ -0,0 +1,22 @@
use std::collections::HashMap;
use gtfs_structures::{Agency, Calendar, CalendarDate, RawStopTime, RawTrip, Route, Stop};
// The main GTFS struct. This is similar to (but not exactly) gtfs-structures::Gtfs because we don't need everything
#[derive(Debug)]
pub struct Gtfs {
/// All agencies. They can not be read by `agency_id`, as it is not a required field
pub agencies: Vec<Agency>,
/// All Calendar by `service_id`
pub calendar: HashMap<String, Calendar>,
/// All calendar dates grouped by service_id
pub calendar_dates: HashMap<String, Vec<CalendarDate>>,
/// All routes by `route_id`
pub routes: HashMap<String, Route>,
/// All stop by `stop_id`.
pub stops: HashMap<String, Stop>,
/// All trips by trip_id
pub trips: HashMap<String, RawTrip>,
/// Stop times for the chosen stops and the chosen routes
pub stop_times: HashMap<(String, u32), RawStopTime>,
}

27
src/gtfs/utils.rs Normal file
View File

@ -0,0 +1,27 @@
use std::collections::HashSet;
use crate::gtfs::structs::Gtfs;
pub fn stop_ids_from_codes(gtfs: &Gtfs, stop_codes: &HashSet<String>) -> HashSet<String> {
let mut ids: HashSet<String> = HashSet::new();
for stop in &gtfs.stops {
let stop_number = stop.1.code.as_ref();
if stop_number.is_some() && stop_codes.contains(stop_number.unwrap().as_str()) {
ids.insert(stop.0.clone());
}
}
return ids;
}
pub fn route_ids_from_numbers(gtfs: &Gtfs, route_numbers: &HashSet<String>) -> HashSet<String> {
let mut ids: HashSet<String> = HashSet::new();
for route in &gtfs.routes {
let route_number = route.1.short_name.as_ref();
if route_number.is_some() && route_numbers.contains(route_number.unwrap().as_str()) {
ids.insert(route.0.clone());
}
}
return ids;
}

View File

@ -1,6 +1,5 @@
use std::{collections::HashSet, time::Instant};
mod gtfs; mod gtfs;
use std::{collections::HashSet, time::Instant};
const SRC_FILE: &str = "/home/nahuel/Downloads/GTFS_Realtime.zip"; const SRC_FILE: &str = "/home/nahuel/Downloads/GTFS_Realtime.zip";
@ -9,9 +8,9 @@ fn main() {
for _ in 0..1000 { for _ in 0..1000 {
let start_gtfs = Instant::now(); let start_gtfs = Instant::now();
println!("Loading GTFS data..."); println!("Loading GTFS data...");
let gtfs = gtfs::init(SRC_FILE, let gtfs = gtfs::load(SRC_FILE,
HashSet::from([String::from("15A"), String::from("F1"), String::from("F2"), String::from("F3")]), HashSet::from([String::from("15A"), String::from("F1"), String::from("F2"), String::from("F3")]),
HashSet::from([String::from("1117")])); HashSet::from([String::from("1117")]));
println!("Loaded records in {:#?}. Data size: {:#?}", start_gtfs.elapsed(), ::std::mem::size_of_val(&gtfs)) println!("Loaded records in {:#?}. Data size: {:#?}", start_gtfs.elapsed(), ::std::mem::size_of_val(&gtfs))
} }