Skip to content

Commit

Permalink
Merge pull request #1 from naviqore/feature/gtfs-schedule-parser
Browse files Browse the repository at this point in the history
Feature/gtfs schedule parser
  • Loading branch information
Brunner246 authored Apr 15, 2024
2 parents 769bf03 + f0cc196 commit 4711970
Show file tree
Hide file tree
Showing 19 changed files with 789 additions and 26 deletions.
18 changes: 9 additions & 9 deletions .github/workflows/maven-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,15 @@ jobs:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3
- name: Set up JDK 21
uses: actions/setup-java@v3
with:
java-version: '21'
distribution: 'temurin'
cache: maven
- name: Build with Maven
run: mvn -B package --file pom.xml
- uses: actions/checkout@v3
- name: Set up JDK 21
uses: actions/setup-java@v3
with:
java-version: '21'
distribution: 'temurin'
cache: maven
- name: Build with Maven
run: mvn -B verify --file pom.xml

# Optional: Uploads the full dependency graph to GitHub to improve the quality of Dependabot alerts this repository can receive
# - name: Update dependency graph
Expand Down
98 changes: 98 additions & 0 deletions src/main/java/ch/naviqore/gtfs/schedule/GtfsScheduleParser.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package ch.naviqore.gtfs.schedule;

import ch.naviqore.gtfs.schedule.model.GtfsScheduleBuilder;
import ch.naviqore.gtfs.schedule.type.ExceptionType;
import ch.naviqore.gtfs.schedule.type.RouteType;
import ch.naviqore.gtfs.schedule.type.ServiceDayTime;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.csv.CSVRecord;

import java.time.DayOfWeek;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;

/**
* GTFS CSV Records Parser
*
* @author munterfi
*/
@RequiredArgsConstructor
@Log4j2
class GtfsScheduleParser {

private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyyMMdd");
private static final Map<String, DayOfWeek> DAY_MAPPINGS = Map.of("monday", DayOfWeek.MONDAY, "tuesday",
DayOfWeek.TUESDAY, "wednesday", DayOfWeek.WEDNESDAY, "thursday", DayOfWeek.THURSDAY, "friday",
DayOfWeek.FRIDAY, "saturday", DayOfWeek.SATURDAY, "sunday", DayOfWeek.SUNDAY);
private final GtfsScheduleBuilder builder;

void parseAgencies(List<CSVRecord> records) {
log.info("Parsing {} agency records", records.size());
for (CSVRecord record : records) {
builder.addAgency(record.get("agency_id"), record.get("agency_name"), record.get("agency_url"),
record.get("agency_timezone"));
}
}

void parseCalendars(List<CSVRecord> records) {
log.info("Parsing {} calendar records", records.size());
for (CSVRecord record : records) {
EnumSet<DayOfWeek> serviceDays = EnumSet.noneOf(DayOfWeek.class);
DAY_MAPPINGS.forEach((key, value) -> {
if ("1".equals(record.get(key))) {
serviceDays.add(value);
}
});
builder.addCalendar(record.get("service_id"), serviceDays,
LocalDate.parse(record.get("start_date"), DATE_FORMATTER),
LocalDate.parse(record.get("end_date"), DATE_FORMATTER));
}
}

void parseCalendarDates(List<CSVRecord> records) {
log.info("Parsing {} calendar date records", records.size());
for (CSVRecord record : records) {
builder.addCalendarDate(record.get("service_id"), LocalDate.parse(record.get("date"), DATE_FORMATTER),
ExceptionType.parse(record.get("exception_type")));
}
}

void parseStops(List<CSVRecord> records) {
log.info("Parsing {} stop records", records.size());
for (CSVRecord record : records) {
builder.addStop(record.get("stop_id"), record.get("stop_name"), Double.parseDouble(record.get("stop_lat")),
Double.parseDouble(record.get("stop_lon")));
}
}

void parseRoutes(List<CSVRecord> records) {
log.info("Parsing {} route records", records.size());
for (CSVRecord record : records) {
// TODO: Route types are not standardized in any way.
// RouteType.parse(record.get("route_type"))
builder.addRoute(record.get("route_id"), record.get("agency_id"), record.get("route_short_name"),
record.get("route_long_name"), RouteType.RAIL);
}
}

void parseTrips(List<CSVRecord> records) {
log.info("Parsing {} trip records", records.size());
for (CSVRecord record : records) {
builder.addTrip(record.get("trip_id"), record.get("route_id"), record.get("service_id"));
}
}

void parseStopTimes(List<CSVRecord> records) {
log.info("Parsing {} stop time records", records.size());
for (CSVRecord record : records) {
builder.addStopTime(record.get("trip_id"), record.get("stop_id"),
ServiceDayTime.parse(record.get("arrival_time")),
ServiceDayTime.parse(record.get("departure_time")));
}
}

}
43 changes: 35 additions & 8 deletions src/main/java/ch/naviqore/gtfs/schedule/GtfsScheduleReader.java
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
package ch.naviqore.gtfs.schedule;

import ch.naviqore.gtfs.schedule.model.GtfsSchedule;
import ch.naviqore.gtfs.schedule.model.GtfsScheduleBuilder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.RequiredArgsConstructor;
import lombok.extern.log4j.Log4j2;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.ByteOrderMark;
import org.apache.commons.io.input.BOMInputStream;

import java.io.File;
import java.io.FileReader;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
Expand Down Expand Up @@ -60,9 +64,9 @@ public enum GtfsFile {
private final String fileName;
}

public Map<GtfsFile, List<CSVRecord>> read(String path) throws IOException {
public GtfsSchedule read(String path) throws IOException {
File file = new File(path);
Map<GtfsFile, List<CSVRecord>> records = new HashMap<>();
Map<GtfsFile, List<CSVRecord>> records;

if (file.isDirectory()) {
log.info("Reading GTFS CSV files from directory: {}", path);
Expand All @@ -73,7 +77,21 @@ public Map<GtfsFile, List<CSVRecord>> read(String path) throws IOException {
} else {
throw new IllegalArgumentException("Path must be a directory or a .zip file");
}
return records;

return buildSchedule(records);
}

private GtfsSchedule buildSchedule(Map<GtfsFile, List<CSVRecord>> records) {
GtfsScheduleBuilder builder = GtfsScheduleBuilder.builder();
GtfsScheduleParser parser = new GtfsScheduleParser(builder);
parser.parseAgencies(records.get(GtfsFile.AGENCY));
parser.parseCalendars(records.get(GtfsFile.CALENDAR));
parser.parseCalendarDates(records.get(GtfsFile.CALENDAR_DATES));
parser.parseStops(records.get(GtfsFile.STOPS));
parser.parseRoutes(records.get(GtfsFile.ROUTES));
parser.parseTrips(records.get(GtfsFile.TRIPS));
parser.parseStopTimes(records.get(GtfsFile.STOP_TIMES));
return builder.build();
}

private Map<GtfsFile, List<CSVRecord>> readFromDirectory(File directory) throws IOException {
Expand All @@ -82,7 +100,7 @@ private Map<GtfsFile, List<CSVRecord>> readFromDirectory(File directory) throws
for (GtfsFile fileType : GtfsFile.values()) {
File csvFile = new File(directory, fileType.getFileName());
if (csvFile.exists()) {
log.debug("Reading GTFS CSV file: {}", csvFile.getAbsolutePath());
log.info("Reading GTFS CSV file: {}", csvFile.getAbsolutePath());
records.put(fileType, readCsvFile(csvFile));
} else {
log.warn("GTFS CSV file {} not found", csvFile.getAbsolutePath());
Expand All @@ -99,8 +117,12 @@ private Map<GtfsFile, List<CSVRecord>> readFromZip(File zipFile) throws IOExcept
for (GtfsFile fileType : GtfsFile.values()) {
ZipEntry entry = zf.getEntry(fileType.getFileName());
if (entry != null) {
log.debug("Reading GTFS file from ZIP: {}", entry.getName());
try (InputStreamReader reader = new InputStreamReader(zf.getInputStream(entry), StandardCharsets.UTF_8)) {
log.info("Reading GTFS file from ZIP: {}", entry.getName());
try (InputStreamReader reader = new InputStreamReader(BOMInputStream.builder()
.setInputStream(zf.getInputStream(entry))
.setByteOrderMarks(ByteOrderMark.UTF_8)
.setInclude(false)
.get(), StandardCharsets.UTF_8)) {
records.put(fileType, readCsv(reader));
}
} else {
Expand All @@ -113,14 +135,19 @@ private Map<GtfsFile, List<CSVRecord>> readFromZip(File zipFile) throws IOExcept
}

private List<CSVRecord> readCsvFile(File file) throws IOException {
try (FileReader reader = new FileReader(file)) {
try (FileInputStream fileInputStream = new FileInputStream(file);
BOMInputStream bomInputStream = BOMInputStream.builder()
.setInputStream(fileInputStream)
.setByteOrderMarks(ByteOrderMark.UTF_8)
.get(); InputStreamReader reader = new InputStreamReader(bomInputStream, StandardCharsets.UTF_8)) {
return readCsv(reader);
}
}

private List<CSVRecord> readCsv(InputStreamReader reader) throws IOException {
CSVFormat format = CSVFormat.DEFAULT.builder().setHeader().setIgnoreHeaderCase(true).setTrim(true).build();
try (CSVParser parser = new CSVParser(reader, format)) {
log.debug("CSV Headers: {}", parser.getHeaderMap().keySet());
return parser.getRecords();
}
}
Expand Down
18 changes: 18 additions & 0 deletions src/main/java/ch/naviqore/gtfs/schedule/RunExample.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package ch.naviqore.gtfs.schedule;

import ch.naviqore.gtfs.schedule.model.GtfsSchedule;
import ch.naviqore.gtfs.schedule.model.GtfsScheduleDay;

import java.io.IOException;
import java.time.LocalDate;

public class RunExample {
private static final String GTFS_FILE = "/Users/munterfi/Downloads/gtfs_fp2024_2024-04-11_09-11.zip";

public static void main(String[] args) throws IOException, InterruptedException {
GtfsSchedule schedule = new GtfsScheduleReader().read(GTFS_FILE);
GtfsScheduleDay scheduleDay = schedule.getScheduleForDay(LocalDate.now());
System.gc();
Thread.sleep(30000);
}
}
4 changes: 4 additions & 0 deletions src/main/java/ch/naviqore/gtfs/schedule/model/Agency.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
package ch.naviqore.gtfs.schedule.model;

public record Agency(String agency, String name, String url, String timezone) {
}
64 changes: 64 additions & 0 deletions src/main/java/ch/naviqore/gtfs/schedule/model/Calendar.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
package ch.naviqore.gtfs.schedule.model;

import ch.naviqore.gtfs.schedule.type.ExceptionType;
import lombok.AccessLevel;
import lombok.Getter;
import lombok.RequiredArgsConstructor;

import java.time.DayOfWeek;
import java.time.LocalDate;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.Map;
import java.util.Objects;

@RequiredArgsConstructor(access = AccessLevel.PACKAGE)
@Getter
public final class Calendar {
private final String id;
private final EnumSet<DayOfWeek> serviceDays;
private final LocalDate startDate;
private final LocalDate endDate;
private final Map<LocalDate, CalendarDate> calendarDates = new HashMap<>();

/**
* Determines if the service is operational on a specific day, considering both regular service days and
* exceptions.
*
* @param date the date to check for service availability
* @return true if the service is operational on the given date, false otherwise
*/
public boolean isServiceAvailable(LocalDate date) {
if (date.isBefore(startDate) || date.isAfter(endDate)) {
return false;
}
CalendarDate exception = calendarDates.get(date);
if (exception != null) {
return exception.type() == ExceptionType.ADDED;
}
return serviceDays.contains(date.getDayOfWeek());
}

void addCalendarDate(CalendarDate calendarDate) {
calendarDates.put(calendarDate.date(), calendarDate);
}

@Override
public boolean equals(Object obj) {
if (obj == this) return true;
if (obj == null || obj.getClass() != this.getClass()) return false;
var that = (Calendar) obj;
return Objects.equals(this.id, that.id);
}

@Override
public int hashCode() {
return Objects.hash(id);
}

@Override
public String toString() {
return "Calendar[" + "id=" + id + ", " + "serviceDays=" + serviceDays + ", " + "startDate=" + startDate + ", " + "endDate=" + endDate + ']';
}

}
12 changes: 12 additions & 0 deletions src/main/java/ch/naviqore/gtfs/schedule/model/CalendarDate.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
package ch.naviqore.gtfs.schedule.model;

import ch.naviqore.gtfs.schedule.type.ExceptionType;

import java.time.LocalDate;

public record CalendarDate(Calendar calendar, LocalDate date, ExceptionType type) implements Comparable<CalendarDate> {
@Override
public int compareTo(CalendarDate o) {
return this.date.compareTo(o.date);
}
}
55 changes: 55 additions & 0 deletions src/main/java/ch/naviqore/gtfs/schedule/model/GtfsSchedule.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
package ch.naviqore.gtfs.schedule.model;

import lombok.AccessLevel;
import lombok.RequiredArgsConstructor;

import java.time.LocalDate;
import java.util.Collections;
import java.util.Map;
import java.util.stream.Collectors;

@RequiredArgsConstructor(access = AccessLevel.PACKAGE)
public class GtfsSchedule {

private final Map<String, Agency> agencies;
private final Map<String, Calendar> calendars;
private final Map<String, Stop> stops;
private final Map<String, Route> routes;
private final Map<String, Trip> trips;

/**
* Retrieves a snapshot of the GTFS schedule active on a specific date.
*
* @param date the date for which the active schedule is requested.
* @return GtfsScheduleDay containing only the active routes, stops, and trips for the specified date.
*/
public GtfsScheduleDay getScheduleForDay(LocalDate date) {
Map<String, Trip> activeTrips = trips.entrySet().stream()
.filter(entry -> entry.getValue().getCalendar().isServiceAvailable(date))
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));

// TODO: Implement efficiently without copying.
// return new GtfsScheduleDay(date, activeStops, activeRoutes, activeTrips);
return null;
}

public Map<String, Agency> getAgencies() {
return Collections.unmodifiableMap(agencies);
}

public Map<String, Calendar> getCalendars() {
return Collections.unmodifiableMap(calendars);
}

public Map<String, Stop> getStops() {
return Collections.unmodifiableMap(stops);
}

public Map<String, Route> getRoutes() {
return Collections.unmodifiableMap(routes);
}

public Map<String, Trip> getTrips() {
return Collections.unmodifiableMap(trips);
}
}
Loading

0 comments on commit 4711970

Please sign in to comment.