From ae40dddafc93724f5f45a7214cc15eb0137a016b Mon Sep 17 00:00:00 2001 From: "philip.cline" Date: Wed, 10 May 2023 14:59:00 -0400 Subject: [PATCH 01/19] feat(transformations): add preserve fields transformation --- .../models/transform/FeedTransformation.java | 9 +- .../PreserveCustomFieldsTransformation.java | 184 ++++++++++++++++++ 2 files changed, 189 insertions(+), 4 deletions(-) create mode 100644 src/main/java/com/conveyal/datatools/manager/models/transform/PreserveCustomFieldsTransformation.java diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/FeedTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/FeedTransformation.java index 0433fa457..221b26c67 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/transform/FeedTransformation.java +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/FeedTransformation.java @@ -27,10 +27,11 @@ @JsonIgnoreProperties(ignoreUnknown = true) @JsonTypeInfo(use = JsonTypeInfo.Id.NAME) @JsonSubTypes({ - @JsonSubTypes.Type(value = DeleteRecordsTransformation.class, name = "DeleteRecordsTransformation"), - @JsonSubTypes.Type(value = NormalizeFieldTransformation.class, name = "NormalizeFieldTransformation"), - @JsonSubTypes.Type(value = ReplaceFileFromVersionTransformation.class, name = "ReplaceFileFromVersionTransformation"), - @JsonSubTypes.Type(value = ReplaceFileFromStringTransformation.class, name = "ReplaceFileFromStringTransformation") + @JsonSubTypes.Type(value = DeleteRecordsTransformation.class, name = "DeleteRecordsTransformation"), + @JsonSubTypes.Type(value = NormalizeFieldTransformation.class, name = "NormalizeFieldTransformation"), + @JsonSubTypes.Type(value = ReplaceFileFromVersionTransformation.class, name = "ReplaceFileFromVersionTransformation"), + @JsonSubTypes.Type(value = ReplaceFileFromStringTransformation.class, name = "ReplaceFileFromStringTransformation"), + @JsonSubTypes.Type(value = PreserveCustomFieldsTransformation.class, name = "PreserveCustomFieldsTransformation") }) public abstract class FeedTransformation implements Serializable { private static final long serialVersionUID = 1L; diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/PreserveCustomFieldsTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/PreserveCustomFieldsTransformation.java new file mode 100644 index 000000000..439837718 --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/PreserveCustomFieldsTransformation.java @@ -0,0 +1,184 @@ +package com.conveyal.datatools.manager.models.transform; + +import com.conveyal.datatools.common.status.MonitorableJob; +import com.conveyal.datatools.manager.models.TableTransformResult; +import com.conveyal.datatools.manager.models.TransformType; +import com.opencsv.CSVReader; +import com.opencsv.CSVWriter; +import com.opencsv.exceptions.CsvValidationException; +import org.apache.commons.io.input.BOMInputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import com.conveyal.gtfs.loader.Table; + +import java.io.*; +import java.nio.charset.StandardCharsets; +import java.nio.file.*; +import java.util.*; +import java.util.stream.Collector; +import java.util.stream.Collectors; +import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.text.WordUtils; + + + +/** + * This feed transformation will attempt to preserve any custom fields from an entered csv in the final GTFS output. + */ +public class PreserveCustomFieldsTransformation extends ZipTransformation { + private static final Logger LOG = LoggerFactory.getLogger(PreserveCustomFieldsTransformation.class); + private static List tablePrimaryKeys = new ArrayList<>(); + private static Table specTable; + /** no-arg constructor for de/serialization */ + public PreserveCustomFieldsTransformation() {} + + public static PreserveCustomFieldsTransformation create(String sourceVersionId, String table) { + PreserveCustomFieldsTransformation transformation = new PreserveCustomFieldsTransformation(); + transformation.sourceVersionId = sourceVersionId; + transformation.table = table; + return transformation; + } + + @Override + public void validateParameters(MonitorableJob.Status status) { + if (csvData == null) { + status.fail("CSV data must not be null (delete table not yet supported)"); + } + } + + public static Collector toSingleton() { + return Collectors.collectingAndThen( + Collectors.toList(), + list -> { + if (list.size() != 1) { + throw new IllegalStateException(); + } + return list.get(0); + } + ); + } + + private static HashMap createCsvHashMap(CSVReader reader, List primaryKeys) throws CsvValidationException, IOException { + HashMap lookup = new HashMap<>(); + + String[] nextLine; + while ((nextLine = reader.readNext()) != null) { + final String[] finalNextLine = nextLine; + List customCsvKeyValues = primaryKeys.stream().map(column -> finalNextLine[column]).collect(Collectors.toList()); + + // Concatenate keys to make a lookup hash and add to the hash map + String hashKey = StringUtils.join(customCsvKeyValues, "_"); + lookup.put(hashKey, finalNextLine); + } + + return lookup; + } + + private static void writeLine(CSVWriter writer, String[] row, List customFields, Map customHeaders, String[] customValues) { + // Add new custom fields to the editor csv rows + String[] newRow = Arrays.copyOf(row, row.length + customFields.size()); + if (customValues != null) { + // Write the custom values, if we have a match + for (int columnDiff = 0; columnDiff < customFields.size(); columnDiff++) { + String customField = customFields.get(columnDiff); + int customFieldIndex = customHeaders.get(customField); + newRow[row.length + columnDiff] = customValues[customFieldIndex]; + } + } + writer.writeNext(newRow); + } + + private static Map mapHeaderColumns(String[] headers) { + Map headersMapping = new HashMap<>(); + for (int i = 0; i < headers.length; i++) headersMapping.put(headers[i], i); + return headersMapping; + } + + private static String getClassNameFromTable (String table) { + String underscoreRemoved = table.replace("_", " "); + String capitalized = WordUtils.capitalize(underscoreRemoved); + String oneWordName = capitalized.replace(" ", ""); + if (oneWordName.substring(oneWordName.length() - 1).equals("s")) { + oneWordName = oneWordName.substring(0, oneWordName.length() - 1); + } + return "com.conveyal.model." + oneWordName + "DTO"; + } + + @Override + public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status status) { + String tableName = table + ".txt"; + Path targetZipPath = Paths.get(zipTarget.gtfsFile.getAbsolutePath()); + // Try to dynamically load the class for CSVBean + // String csvDataClassName = getClassNameFromTable(table); +// Class csvDataClass = Class.forName(csvDataClassName); + + // TODO: is there a better way to do this than using a Singleton collector? + specTable = Arrays.stream(Table.tablesInOrder) + .filter(t -> t.name.equals(table)) + .collect(toSingleton()); + tablePrimaryKeys = specTable.getPrimaryKeyNames(); + + try( FileSystem targetZipFs = FileSystems.newFileSystem(targetZipPath, (ClassLoader) null) ){ + List specTableFields = specTable.specFields().stream().map(f -> f.name).collect(Collectors.toList()); + Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs); + + // TODO: There must be a better way to do this. + InputStream is = Files.newInputStream(targetTxtFilePath); + final File tempFile = File.createTempFile(tableName + "-temp", ".txt"); + File output = File.createTempFile(tableName + "-output-temp", ".txt"); + FileOutputStream out = new FileOutputStream(tempFile); + IOUtils.copy(is, out); + + FileInputStream fileInputStream = new FileInputStream(tempFile); + // BOMInputStream to avoid any Byte Order Marks at the start of files. + CSVReader editorFileReader = new CSVReader(new InputStreamReader(new BOMInputStream(fileInputStream), StandardCharsets.UTF_8)); + CSVReader customFileReader = new CSVReader(new StringReader(csvData)); + + // Store the headers with their indices in CSV for later lookups + String[] customHeaders = customFileReader.readNext(); + String[] editorHeaders = editorFileReader.readNext(); + Map customCsvHeaders = mapHeaderColumns(customHeaders); + Map editorCsvHeaders = mapHeaderColumns(editorHeaders); + + // Find the customFields in the input csv + List customFields = Arrays.stream(customHeaders).filter(h -> !specTableFields.contains(h)).collect(Collectors.toList()); + if (customFields.size() == 0) return; + + // Find the key columns in the custom CSV + List customCsvKeyColumns = tablePrimaryKeys.stream() + .map(customCsvHeaders::get) + .collect(Collectors.toList()); + + HashMap lookup = createCsvHashMap(customFileReader, customCsvKeyColumns); + CSVWriter writer = new CSVWriter(new FileWriter(output)); + writeLine(writer, editorHeaders, customFields, customCsvHeaders, customHeaders); // Write headers before starting lookups + + String[] nextLine; + while((nextLine = editorFileReader.readNext()) != null) { + String[] finalNextLine = nextLine; // TODO: there must be some way around this. + List editorCsvPrimaryKeyValues = tablePrimaryKeys.stream() + .map(key -> finalNextLine[editorCsvHeaders.get(key)]) + .collect(Collectors.toList()); // Map the keys to the values for the row + + String hashKey = StringUtils.join(editorCsvPrimaryKeyValues, "_"); + String[] customCsvLine = lookup.get(hashKey); + writeLine(writer, nextLine, customFields, customCsvHeaders, customCsvLine); + } + writer.close(); + + Files.copy(output.toPath(), targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING); + tempFile.deleteOnExit(); + output.deleteOnExit(); + zipTarget.feedTransformResult.tableTransformResults.add(new TableTransformResult(tableName, TransformType.TABLE_MODIFIED)); + } catch (NoSuchFileException e) { + status.fail("Source version does not contain table: " + tableName, e); + } catch(IOException e) { + status.fail("An exception occurred when writing output with custom fields", e); + } catch (CsvValidationException ex) { + ex.printStackTrace(); + } catch (Exception e) { + status.fail("Unknown error encountered while transforming zip file", e); + } + } +} From e831764dfb5cf55be2bd50b9eea99e44f9da5296 Mon Sep 17 00:00:00 2001 From: "philip.cline" Date: Wed, 10 May 2023 16:22:23 -0400 Subject: [PATCH 02/19] refactor(transformations): update gtfs-lib to include Table class changes. --- pom.xml | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 632b0d7da..dd972bbe4 100644 --- a/pom.xml +++ b/pom.xml @@ -268,7 +268,7 @@ com.github.conveyal gtfs-lib - 3d79493 + 41a6503 @@ -414,6 +414,14 @@ snakeyaml 1.26 + + + + com.opencsv + opencsv + 5.7.1 + + - - com.opencsv - opencsv - 5.7.1 - - - 41a6503 + a3e5707 diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/PreserveCustomFieldsTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/PreserveCustomFieldsTransformation.java index bd97218c9..e247d2db3 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/transform/PreserveCustomFieldsTransformation.java +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/PreserveCustomFieldsTransformation.java @@ -66,7 +66,9 @@ public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status st .filter(t -> t.name.equals(table)) .findFirst(); - if (!streamResult.isPresent()) {throw new Exception(String.format("could not find specTable for table %s", table));} + if (!streamResult.isPresent()) { + throw new Exception(String.format("could not find specTable for table %s", table)); + } Table specTable = streamResult.get(); try (FileSystem targetZipFs = FileSystems.newFileSystem(targetZipPath, (ClassLoader) null)) { diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/StringTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/StringTransformation.java index aa6731a3a..c845d4f37 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/transform/StringTransformation.java +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/StringTransformation.java @@ -48,7 +48,14 @@ public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status st int lineCount = (int) csvData.chars().filter(c -> c == NEW_LINE_CHARACTER_CODE).count(); int addedCount = type == TransformType.TABLE_ADDED ? lineCount : 0; int updatedCount = type == TransformType.TABLE_MODIFIED ? lineCount : 0; - zipTarget.feedTransformResult.tableTransformResults.add(new TableTransformResult(tableName, type, 0, updatedCount, addedCount, 0)); + zipTarget.feedTransformResult.tableTransformResults.add(new TableTransformResult( + tableName, + type, + 0, + updatedCount, + addedCount, + 0 + )); } catch (Exception e) { status.fail("Unknown error encountered while transforming zip file", e); } diff --git a/src/test/java/com/conveyal/datatools/manager/jobs/ArbitraryTransformJobTest.java b/src/test/java/com/conveyal/datatools/manager/jobs/ArbitraryTransformJobTest.java index 541503322..3bb40f228 100644 --- a/src/test/java/com/conveyal/datatools/manager/jobs/ArbitraryTransformJobTest.java +++ b/src/test/java/com/conveyal/datatools/manager/jobs/ArbitraryTransformJobTest.java @@ -8,6 +8,7 @@ import com.conveyal.datatools.manager.models.FeedVersion; import com.conveyal.datatools.manager.models.Project; import com.conveyal.datatools.manager.models.Snapshot; +import com.conveyal.datatools.manager.models.TableTransformResult; import com.conveyal.datatools.manager.models.transform.AddCustomFileFromStringTransformation; import com.conveyal.datatools.manager.models.transform.DeleteRecordsTransformation; import com.conveyal.datatools.manager.models.transform.FeedTransformRules; @@ -234,17 +235,15 @@ void canPreserveCustomFieldsInStops() throws IOException { feedSource, zipFolderFiles("fake-agency-with-only-calendar-dates") ); - LOG.info("Checking assertions."); - + TableTransformResult transformResult = targetVersion.feedTransformResult.tableTransformResults.get(0); assertEquals( 2, - targetVersion.feedTransformResult.tableTransformResults.get(0).customColumnsAdded, + transformResult.customColumnsAdded, "stops.txt custom column count should equal input csv data # of custom columns" ); - assertEquals( 2, - targetVersion.feedTransformResult.tableTransformResults.get(0).updatedCount, + transformResult.updatedCount, "stops.txt row count modified with custom content should equal input csv data # of custom columns" ); } @@ -260,8 +259,6 @@ void canAddCustomFile() throws IOException { feedSource, zipFolderFiles("fake-agency-with-only-calendar-dates") ); - - LOG.info("Checking assertions."); assertEquals( 2, targetVersion.feedTransformResult.tableTransformResults.get(0).addedCount, From bf52aae8c9233943c5216d43f360e73487090b74 Mon Sep 17 00:00:00 2001 From: "philip.cline" Date: Fri, 7 Jul 2023 14:26:33 -0400 Subject: [PATCH 19/19] chore(deps): update GTFS-lib ref --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 33a52e498..628aa09fe 100644 --- a/pom.xml +++ b/pom.xml @@ -268,7 +268,7 @@ com.github.conveyal gtfs-lib - a3e5707 + f2ceb59