Skip to content

Commit

Permalink
Add cache for value objects
Browse files Browse the repository at this point in the history
- Example with GTFS of Switzerland.
- Add cache for value objects to reduce the schedule size.
  • Loading branch information
munterfi committed Apr 14, 2024
1 parent e2fd4da commit f0cc196
Show file tree
Hide file tree
Showing 5 changed files with 177 additions and 63 deletions.
98 changes: 98 additions & 0 deletions src/main/java/ch/naviqore/gtfs/schedule/GtfsScheduleParser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package ch.naviqore.gtfs.schedule;

import ch.naviqore.gtfs.schedule.model.GtfsScheduleBuilder;
import ch.naviqore.gtfs.schedule.type.ExceptionType;
import ch.naviqore.gtfs.schedule.type.RouteType;
import ch.naviqore.gtfs.schedule.type.ServiceDayTime;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.csv.CSVRecord;

import java.time.DayOfWeek;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;

/**
* GTFS CSV Records Parser
*
* @author munterfi
*/
@RequiredArgsConstructor
@Log4j2
class GtfsScheduleParser {

private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyyMMdd");
private static final Map<String, DayOfWeek> DAY_MAPPINGS = Map.of("monday", DayOfWeek.MONDAY, "tuesday",
DayOfWeek.TUESDAY, "wednesday", DayOfWeek.WEDNESDAY, "thursday", DayOfWeek.THURSDAY, "friday",
DayOfWeek.FRIDAY, "saturday", DayOfWeek.SATURDAY, "sunday", DayOfWeek.SUNDAY);
private final GtfsScheduleBuilder builder;

void parseAgencies(List<CSVRecord> records) {
log.info("Parsing {} agency records", records.size());
for (CSVRecord record : records) {
builder.addAgency(record.get("agency_id"), record.get("agency_name"), record.get("agency_url"),
record.get("agency_timezone"));
}
}

void parseCalendars(List<CSVRecord> records) {
log.info("Parsing {} calendar records", records.size());
for (CSVRecord record : records) {
EnumSet<DayOfWeek> serviceDays = EnumSet.noneOf(DayOfWeek.class);
DAY_MAPPINGS.forEach((key, value) -> {
if ("1".equals(record.get(key))) {
serviceDays.add(value);
}
});
builder.addCalendar(record.get("service_id"), serviceDays,
LocalDate.parse(record.get("start_date"), DATE_FORMATTER),
LocalDate.parse(record.get("end_date"), DATE_FORMATTER));
}
}

void parseCalendarDates(List<CSVRecord> records) {
log.info("Parsing {} calendar date records", records.size());
for (CSVRecord record : records) {
builder.addCalendarDate(record.get("service_id"), LocalDate.parse(record.get("date"), DATE_FORMATTER),
ExceptionType.parse(record.get("exception_type")));
}
}

void parseStops(List<CSVRecord> records) {
log.info("Parsing {} stop records", records.size());
for (CSVRecord record : records) {
builder.addStop(record.get("stop_id"), record.get("stop_name"), Double.parseDouble(record.get("stop_lat")),
Double.parseDouble(record.get("stop_lon")));
}
}

void parseRoutes(List<CSVRecord> records) {
log.info("Parsing {} route records", records.size());
for (CSVRecord record : records) {
// TODO: Route types are not standardized in any way.
// RouteType.parse(record.get("route_type"))
builder.addRoute(record.get("route_id"), record.get("agency_id"), record.get("route_short_name"),
record.get("route_long_name"), RouteType.RAIL);
}
}

void parseTrips(List<CSVRecord> records) {
log.info("Parsing {} trip records", records.size());
for (CSVRecord record : records) {
builder.addTrip(record.get("trip_id"), record.get("route_id"), record.get("service_id"));
}
}

void parseStopTimes(List<CSVRecord> records) {
log.info("Parsing {} stop time records", records.size());
for (CSVRecord record : records) {
builder.addStopTime(record.get("trip_id"), record.get("stop_id"),
ServiceDayTime.parse(record.get("arrival_time")),
ServiceDayTime.parse(record.get("departure_time")));
}
}

}
91 changes: 35 additions & 56 deletions src/main/java/ch/naviqore/gtfs/schedule/GtfsScheduleReader.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,21 @@

import ch.naviqore.gtfs.schedule.model.GtfsSchedule;
import ch.naviqore.gtfs.schedule.model.GtfsScheduleBuilder;
import ch.naviqore.gtfs.schedule.type.ExceptionType;
import ch.naviqore.gtfs.schedule.type.RouteType;
import ch.naviqore.gtfs.schedule.type.ServiceDayTime;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.ByteOrderMark;
import org.apache.commons.io.input.BOMInputStream;

import java.io.File;
import java.io.FileReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.time.DayOfWeek;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
Expand All @@ -47,21 +42,24 @@
public class GtfsScheduleReader {

private static final String ZIP_FILE_EXTENSION = ".zip";
private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyyMMdd");
private static final Map<String, DayOfWeek> DAY_MAPPINGS = Map.of("monday", DayOfWeek.MONDAY, "tuesday",
DayOfWeek.TUESDAY, "wednesday", DayOfWeek.WEDNESDAY, "thursday", DayOfWeek.THURSDAY, "friday",
DayOfWeek.FRIDAY, "saturday", DayOfWeek.SATURDAY, "sunday", DayOfWeek.SUNDAY);

/**
* Standard GTFS file types and their corresponding file names.
*/
@RequiredArgsConstructor
@Getter
public enum GtfsFile {
AGENCY("agency.txt"), CALENDAR_DATES("calendar_dates.txt"), CALENDAR("calendar.txt"), FARE_ATTRIBUTES(
"fare_attributes.txt"), FARE_RULES("fare_rules.txt"), FREQUENCIES("frequencies.txt"), ROUTES(
"routes.txt"), SHAPES("shapes.txt"), STOP_TIMES("stop_times.txt"), STOPS("stops.txt"), TRIPS(
"trips.txt");
AGENCY("agency.txt"),
CALENDAR_DATES("calendar_dates.txt"),
CALENDAR("calendar.txt"),
FARE_ATTRIBUTES("fare_attributes.txt"),
FARE_RULES("fare_rules.txt"),
FREQUENCIES("frequencies.txt"),
ROUTES("routes.txt"),
SHAPES("shapes.txt"),
STOP_TIMES("stop_times.txt"),
STOPS("stops.txt"),
TRIPS("trips.txt");

private final String fileName;
}
Expand All @@ -85,41 +83,14 @@ public GtfsSchedule read(String path) throws IOException {

private GtfsSchedule buildSchedule(Map<GtfsFile, List<CSVRecord>> records) {
GtfsScheduleBuilder builder = GtfsScheduleBuilder.builder();
for (CSVRecord record : records.get(GtfsFile.AGENCY)) {
builder.addAgency(record.get("agency_id"), record.get("agency_name"), record.get("agency_url"),
record.get("agency_timezone"));
}
for (CSVRecord record : records.get(GtfsFile.STOPS)) {
builder.addStop(record.get("stop_id"), record.get("stop_name"), Double.parseDouble(record.get("stop_lat")),
Double.parseDouble(record.get("stop_lon")));
}
for (CSVRecord record : records.get(GtfsFile.ROUTES)) {
builder.addRoute(record.get("route_id"), record.get("agency_id"), record.get("route_short_name"),
record.get("route_long_name"), RouteType.parse(record.get("route_type")));
}
for (CSVRecord record : records.get(GtfsFile.CALENDAR)) {
EnumSet<DayOfWeek> serviceDays = EnumSet.noneOf(DayOfWeek.class);
DAY_MAPPINGS.forEach((key, value) -> {
if ("1".equals(record.get(key))) {
serviceDays.add(value);
}
});
builder.addCalendar(record.get("service_id"), serviceDays,
LocalDate.parse(record.get("start_date"), DATE_FORMATTER),
LocalDate.parse(record.get("end_date"), DATE_FORMATTER));
}
for (CSVRecord record : records.get(GtfsFile.CALENDAR_DATES)) {
builder.addCalendarDate(record.get("service_id"), LocalDate.parse(record.get("date"), DATE_FORMATTER),
ExceptionType.parse(record.get("exception_type")));
}
for (CSVRecord record : records.get(GtfsFile.TRIPS)) {
builder.addTrip(record.get("trip_id"), record.get("route_id"), record.get("service_id"));
}
for (CSVRecord record : records.get(GtfsFile.STOP_TIMES)) {
builder.addStopTime(record.get("trip_id"), record.get("stop_id"),
ServiceDayTime.parse(record.get("arrival_time")),
ServiceDayTime.parse(record.get("departure_time")));
}
GtfsScheduleParser parser = new GtfsScheduleParser(builder);
parser.parseAgencies(records.get(GtfsFile.AGENCY));
parser.parseCalendars(records.get(GtfsFile.CALENDAR));
parser.parseCalendarDates(records.get(GtfsFile.CALENDAR_DATES));
parser.parseStops(records.get(GtfsFile.STOPS));
parser.parseRoutes(records.get(GtfsFile.ROUTES));
parser.parseTrips(records.get(GtfsFile.TRIPS));
parser.parseStopTimes(records.get(GtfsFile.STOP_TIMES));
return builder.build();
}

Expand All @@ -129,7 +100,7 @@ private Map<GtfsFile, List<CSVRecord>> readFromDirectory(File directory) throws
for (GtfsFile fileType : GtfsFile.values()) {
File csvFile = new File(directory, fileType.getFileName());
if (csvFile.exists()) {
log.debug("Reading GTFS CSV file: {}", csvFile.getAbsolutePath());
log.info("Reading GTFS CSV file: {}", csvFile.getAbsolutePath());
records.put(fileType, readCsvFile(csvFile));
} else {
log.warn("GTFS CSV file {} not found", csvFile.getAbsolutePath());
Expand All @@ -146,9 +117,12 @@ private Map<GtfsFile, List<CSVRecord>> readFromZip(File zipFile) throws IOExcept
for (GtfsFile fileType : GtfsFile.values()) {
ZipEntry entry = zf.getEntry(fileType.getFileName());
if (entry != null) {
log.debug("Reading GTFS file from ZIP: {}", entry.getName());
try (InputStreamReader reader = new InputStreamReader(zf.getInputStream(entry),
StandardCharsets.UTF_8)) {
log.info("Reading GTFS file from ZIP: {}", entry.getName());
try (InputStreamReader reader = new InputStreamReader(BOMInputStream.builder()
.setInputStream(zf.getInputStream(entry))
.setByteOrderMarks(ByteOrderMark.UTF_8)
.setInclude(false)
.get(), StandardCharsets.UTF_8)) {
records.put(fileType, readCsv(reader));
}
} else {
Expand All @@ -161,14 +135,19 @@ private Map<GtfsFile, List<CSVRecord>> readFromZip(File zipFile) throws IOExcept
}

private List<CSVRecord> readCsvFile(File file) throws IOException {
try (FileReader reader = new FileReader(file)) {
try (FileInputStream fileInputStream = new FileInputStream(file);
BOMInputStream bomInputStream = BOMInputStream.builder()
.setInputStream(fileInputStream)
.setByteOrderMarks(ByteOrderMark.UTF_8)
.get(); InputStreamReader reader = new InputStreamReader(bomInputStream, StandardCharsets.UTF_8)) {
return readCsv(reader);
}
}

private List<CSVRecord> readCsv(InputStreamReader reader) throws IOException {
CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setIgnoreHeaderCase(true).setTrim(true).build();
try (CSVParser parser = new CSVParser(reader, format)) {
log.debug("CSV Headers: {}", parser.getHeaderMap().keySet());
return parser.getRecords();
}
}
Expand Down
18 changes: 18 additions & 0 deletions src/main/java/ch/naviqore/gtfs/schedule/RunExample.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package ch.naviqore.gtfs.schedule;

import ch.naviqore.gtfs.schedule.model.GtfsSchedule;
import ch.naviqore.gtfs.schedule.model.GtfsScheduleDay;

import java.io.IOException;
import java.time.LocalDate;

public class RunExample {
private static final String GTFS_FILE = "/Users/munterfi/Downloads/gtfs_fp2024_2024-04-11_09-11.zip";

public static void main(String[] args) throws IOException, InterruptedException {
GtfsSchedule schedule = new GtfsScheduleReader().read(GTFS_FILE);
GtfsScheduleDay scheduleDay = schedule.getScheduleForDay(LocalDate.now());
System.gc();
Thread.sleep(30000);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

/**
* General Transit Feed Specification (GTFS) schedule builder
Expand All @@ -30,6 +31,23 @@
@Log4j2
public class GtfsScheduleBuilder {

/**
* Cache for value objects
*/
static class Cache {
private final Map<LocalDate, LocalDate> localDates = new ConcurrentHashMap<>();
private final Map<ServiceDayTime, ServiceDayTime> serviceDayTimes = new ConcurrentHashMap<>();

public LocalDate getOrAdd(LocalDate value) {
return localDates.computeIfAbsent(value, k -> value);
}

public ServiceDayTime getOrAdd(ServiceDayTime value) {
return serviceDayTimes.computeIfAbsent(value, k -> value);
}
}

private final Cache cache = new Cache();
private final Map<String, Agency> agencies = new HashMap<>();
private final Map<String, Calendar> calendars = new HashMap<>();
private final Map<String, Stop> stops = new HashMap<>();
Expand Down Expand Up @@ -71,12 +89,13 @@ public GtfsScheduleBuilder addRoute(String id, String agencyId, String shortName
return this;
}

public GtfsScheduleBuilder addCalendar(String id, EnumSet<DayOfWeek> serviceDays, LocalDate startDate, LocalDate endDate) {
public GtfsScheduleBuilder addCalendar(String id, EnumSet<DayOfWeek> serviceDays, LocalDate startDate,
LocalDate endDate) {
if (calendars.containsKey(id)) {
throw new IllegalArgumentException("Calendar " + id + " already exists");
}
log.debug("Adding calendar {}", id);
calendars.put(id, new Calendar(id, serviceDays, startDate, endDate));
calendars.put(id, new Calendar(id, serviceDays, cache.getOrAdd(startDate), cache.getOrAdd(endDate)));
return this;
}

Expand All @@ -86,7 +105,7 @@ public GtfsScheduleBuilder addCalendarDate(String calendarId, LocalDate date, Ex
throw new IllegalArgumentException("Calendar " + calendarId + " does not exist");
}
log.debug("Adding calendar {}-{}", calendarId, date);
CalendarDate calendarDate = new CalendarDate(calendar, date, type);
CalendarDate calendarDate = new CalendarDate(calendar, cache.getOrAdd(date), type);
calendar.addCalendarDate(calendarDate);
return this;
}
Expand All @@ -110,7 +129,8 @@ public GtfsScheduleBuilder addTrip(String id, String routeId, String serviceId)
return this;
}

public GtfsScheduleBuilder addStopTime(String tripId, String stopId, ServiceDayTime arrival, ServiceDayTime departure) {
public GtfsScheduleBuilder addStopTime(String tripId, String stopId, ServiceDayTime arrival,
ServiceDayTime departure) {
Trip trip = trips.get(tripId);
if (trip == null) {
throw new IllegalArgumentException("Trip " + tripId + " does not exist");
Expand All @@ -120,7 +140,7 @@ public GtfsScheduleBuilder addStopTime(String tripId, String stopId, ServiceDayT
throw new IllegalArgumentException("Stop " + stopId + " does not exist");
}
log.debug("Adding stop {} to trip {} ({}-{})", stopId, tripId, arrival, departure);
StopTime stopTime = new StopTime(stop, trip, arrival, departure);
StopTime stopTime = new StopTime(stop, trip, cache.getOrAdd(arrival), cache.getOrAdd(departure));
stop.addStopTime(stopTime);
trip.addStopTime(stopTime);
return this;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import lombok.EqualsAndHashCode;
import lombok.Getter;


/**
* Service day time
* <p>
Expand All @@ -14,7 +13,7 @@
*/
@EqualsAndHashCode
@Getter
public class ServiceDayTime implements Comparable<ServiceDayTime> {
public final class ServiceDayTime implements Comparable<ServiceDayTime> {
private final int totalSeconds;

public ServiceDayTime(int hours, int minutes, int seconds) {
Expand Down

0 comments on commit f0cc196

Please sign in to comment.