diff --git a/pom.xml b/pom.xml index 15ec170f7..a265b5442 100644 --- a/pom.xml +++ b/pom.xml @@ -268,7 +268,7 @@ com.github.conveyal gtfs-lib - a3e5707 + f2ceb59 diff --git a/src/main/java/com/conveyal/datatools/manager/models/TableTransformResult.java b/src/main/java/com/conveyal/datatools/manager/models/TableTransformResult.java index c8560e86e..0ff2b0622 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/TableTransformResult.java +++ b/src/main/java/com/conveyal/datatools/manager/models/TableTransformResult.java @@ -11,6 +11,7 @@ public class TableTransformResult implements Serializable { public int deletedCount; public int updatedCount; public int addedCount; + public int customColumnsAdded; public TransformType transformType; public String tableName; @@ -21,6 +22,22 @@ public TableTransformResult(String tableName, TransformType transformType) { this.transformType = transformType; } + public TableTransformResult( + String tableName, + TransformType transformType, + int deletedCount, + int updatedCount, + int addedCount, + int customColumnsAdded + ) { + this.tableName = tableName; + this.transformType = transformType; + this.deletedCount = deletedCount; + this.updatedCount = updatedCount; + this.addedCount = addedCount; + this.customColumnsAdded = customColumnsAdded; + } + public TableTransformResult(String tableName, int deletedCount, int updatedCount, int addedCount) { this.tableName = tableName; this.transformType = TransformType.TABLE_MODIFIED; @@ -28,4 +45,4 @@ public TableTransformResult(String tableName, int deletedCount, int updatedCount this.updatedCount = updatedCount; this.addedCount = addedCount; } -} +} \ No newline at end of file diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/AddCustomFileFromStringTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/AddCustomFileFromStringTransformation.java new file mode 100644 index 000000000..6d336aa6b --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/AddCustomFileFromStringTransformation.java @@ -0,0 +1,24 @@ +package com.conveyal.datatools.manager.models.transform; + +import com.conveyal.datatools.common.status.MonitorableJob; + +public class AddCustomFileFromStringTransformation extends StringTransformation { + + // Additional create method required to ensure transformation type is AddCustomFile in tests. + // Otherwise, we'd use the StringTransformation#create which doesn't differentiate types and hence + // would fail table name tests. + public static AddCustomFileFromStringTransformation create(String csvData, String table) { + AddCustomFileFromStringTransformation transformation = new AddCustomFileFromStringTransformation(); + transformation.csvData = csvData; + transformation.table = table; + return transformation; + } + + @Override + public void validateTableName(MonitorableJob.Status status) { + if (table.contains(".txt")) { + status.fail("CSV Table name should not contain .txt"); + } + } + +} diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/FeedTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/FeedTransformation.java index 0433fa457..e63001b37 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/transform/FeedTransformation.java +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/FeedTransformation.java @@ -30,7 +30,9 @@ @JsonSubTypes.Type(value = DeleteRecordsTransformation.class, name = "DeleteRecordsTransformation"), @JsonSubTypes.Type(value = NormalizeFieldTransformation.class, name = "NormalizeFieldTransformation"), @JsonSubTypes.Type(value = ReplaceFileFromVersionTransformation.class, name = "ReplaceFileFromVersionTransformation"), - @JsonSubTypes.Type(value = ReplaceFileFromStringTransformation.class, name = "ReplaceFileFromStringTransformation") + @JsonSubTypes.Type(value = ReplaceFileFromStringTransformation.class, name = "ReplaceFileFromStringTransformation"), + @JsonSubTypes.Type(value = PreserveCustomFieldsTransformation.class, name = "PreserveCustomFieldsTransformation"), + @JsonSubTypes.Type(value = AddCustomFileFromStringTransformation.class, name = "AddCustomFileTransformation") }) public abstract class FeedTransformation implements Serializable { private static final long serialVersionUID = 1L; @@ -68,6 +70,8 @@ public void doTransform(FeedTransformTarget target, MonitorableJob.Status status status.fail( String.format("Transformation must be of type '%s'.", getTransformationTypeName()) ); + } catch (Exception e) { + status.fail(e.toString()); } } @@ -78,7 +82,7 @@ public void doTransform(FeedTransformTarget target, MonitorableJob.Status status * @param target The database-bound or ZIP-file-bound target the transformation will operate on. * @param status Used to report success or failure status and details. */ - public abstract void transform(Target target, MonitorableJob.Status status); + public abstract void transform(Target target, MonitorableJob.Status status) throws Exception; /** * At the moment, used by DbTransformation to validate field names. @@ -98,7 +102,6 @@ protected void validateTableName(MonitorableJob.Status status) { // Validate fields before running transform. if (GtfsUtils.getGtfsTable(table) == null) { status.fail("Table must be valid GTFS spec table name (without .txt)."); - return; } } } diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/PreserveCustomFieldsTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/PreserveCustomFieldsTransformation.java new file mode 100644 index 000000000..e247d2db3 --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/PreserveCustomFieldsTransformation.java @@ -0,0 +1,138 @@ +package com.conveyal.datatools.manager.models.transform; + +import com.conveyal.datatools.common.status.MonitorableJob; +import com.conveyal.datatools.manager.models.TableTransformResult; +import com.conveyal.datatools.manager.models.TransformType; +import org.supercsv.io.CsvMapReader; +import com.conveyal.gtfs.loader.Table; + +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.File; +import java.io.FileWriter; +import java.io.StringReader; +import java.io.IOException; +import java.nio.file.*; +import java.util.*; +import java.util.stream.Collectors; +import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.ArrayUtils; +import org.supercsv.io.CsvMapWriter; +import org.supercsv.prefs.CsvPreference; + + +/** + * This feed transformation will attempt to preserve any custom fields from an entered csv in the final GTFS output. + */ +public class PreserveCustomFieldsTransformation extends ZipTransformation { + /** no-arg constructor for de/serialization */ + public PreserveCustomFieldsTransformation() {} + + public static PreserveCustomFieldsTransformation create(String csvData, String table) { + PreserveCustomFieldsTransformation transformation = new PreserveCustomFieldsTransformation(); + transformation.csvData = csvData; + transformation.table = table; + return transformation; + } + + @Override + public void validateParameters(MonitorableJob.Status status) { + if (csvData == null) { + status.fail("CSV data must not be null (delete table not yet supported)"); + } + } + + /** + * This method creates a hash map of the GTFS table keys to the custom CSV values for efficient lookup of custom values. + * The hash map key is the key values of the GTFS table (e.g. stop_id for stops) concatenated by an underscore. + * The hash map value is the CsvMapReader (mapping of column to row value). + */ + private static HashMap> createCsvHashMap(CsvMapReader reader, String[] headers, List tablePrimaryKeys) throws IOException { + HashMap> lookup = new HashMap<>(); + Map nextLine; + while ((nextLine = reader.read(headers)) != null) { + List customCsvKeyValues = tablePrimaryKeys.stream().map(nextLine::get).collect(Collectors.toList()); + String hashKey = StringUtils.join(customCsvKeyValues, "_"); + lookup.put(hashKey, nextLine); + } + return lookup; + } + + @Override + public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status status) throws Exception{ + String tableName = table + ".txt"; + Path targetZipPath = Paths.get(zipTarget.gtfsFile.getAbsolutePath()); + Optional streamResult = Arrays.stream(Table.tablesInOrder) + .filter(t -> t.name.equals(table)) + .findFirst(); + + if (!streamResult.isPresent()) { + throw new Exception(String.format("could not find specTable for table %s", table)); + } + Table specTable = streamResult.get(); + + try (FileSystem targetZipFs = FileSystems.newFileSystem(targetZipPath, (ClassLoader) null)) { + List specTableFields = specTable.specFields().stream().map(f -> f.name).collect(Collectors.toList()); + List tablePrimaryKeys = specTable.getPrimaryKeyNames(); + + Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs); + + final File tempFile = File.createTempFile(tableName + "-temp", ".txt"); + File output = File.createTempFile(tableName + "-output-temp", ".txt"); + int rowsModified = 0; + List customFields; + + try ( + InputStream is = Files.newInputStream(targetTxtFilePath); + CsvMapReader customFileReader = new CsvMapReader(new StringReader(csvData), CsvPreference.STANDARD_PREFERENCE); + CsvMapReader editorFileReader = new CsvMapReader(new InputStreamReader(is), CsvPreference.STANDARD_PREFERENCE); + CsvMapWriter writer = new CsvMapWriter(new FileWriter(output), CsvPreference.STANDARD_PREFERENCE); + ){ + + String[] customHeaders = customFileReader.getHeader(true); + String[] editorHeaders = editorFileReader.getHeader(true); + + customFields = Arrays.stream(customHeaders).filter(h -> !specTableFields.contains(h)).collect(Collectors.toList()); + if (customFields.isEmpty()) return; + String[] fullHeaders = ArrayUtils.addAll(editorHeaders, customFields.toArray(new String[0])); + + HashMap> customFieldsLookup = createCsvHashMap(customFileReader, customHeaders, tablePrimaryKeys); + writer.writeHeader(fullHeaders); + + Map row; + while ((row = editorFileReader.read(editorHeaders)) != null) { + List editorCsvPrimaryKeyValues = tablePrimaryKeys.stream() + .map(row::get) + .collect(Collectors.toList()); + + String hashKey = StringUtils.join(editorCsvPrimaryKeyValues, "_"); + Map customCsvValues = customFieldsLookup.get(hashKey); + Map finalRow = row; + customFields.forEach(customField -> { + String value = customCsvValues == null ? null : customCsvValues.get(customField); + finalRow.put(customField, value); + }); + if (customCsvValues != null) rowsModified++; + writer.write(finalRow, fullHeaders); + } + } + Files.copy(output.toPath(), targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING); + tempFile.deleteOnExit(); + output.deleteOnExit(); + zipTarget.feedTransformResult.tableTransformResults.add(new TableTransformResult( + tableName, + TransformType.TABLE_MODIFIED, + 0, + rowsModified, + 0, + customFields.size() + )); + } catch (NoSuchFileException e) { + status.fail("Source version does not contain table: " + tableName, e); + } catch (IOException e) { + status.fail("An exception occurred when writing output with custom fields", e); + } catch (Exception e) { + status.fail("Unknown error encountered while transforming zip file", e); + } + } +} diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromStringTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromStringTransformation.java index 2ac87e569..823dbc5f5 100644 --- a/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromStringTransformation.java +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/ReplaceFileFromStringTransformation.java @@ -1,60 +1,9 @@ package com.conveyal.datatools.manager.models.transform; import com.conveyal.datatools.common.status.MonitorableJob; -import com.conveyal.datatools.manager.models.TableTransformResult; -import com.conveyal.datatools.manager.models.TransformType; - -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.nio.file.FileSystem; -import java.nio.file.FileSystems; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.nio.file.StandardCopyOption; /** * This feed transformation will replace a file in the target zip (table) with the provided csv data. */ -public class ReplaceFileFromStringTransformation extends ZipTransformation { - - public static ReplaceFileFromStringTransformation create(String csvData, String table) { - ReplaceFileFromStringTransformation transformation = new ReplaceFileFromStringTransformation(); - transformation.csvData = csvData; - transformation.table = table; - return transformation; - } - - @Override - public void validateParameters(MonitorableJob.Status status) { - if (csvData == null) { - status.fail("CSV data must not be null (delete table not yet supported)"); - } - } - - @Override - public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status status) { - // if (csvData == null) { - // TODO: If this is a null value, delete the table (not yet supported). - // } - - String tableName = table + ".txt"; - // Run the replace transformation - Path targetZipPath = Paths.get(zipTarget.gtfsFile.getAbsolutePath()); - try( FileSystem targetZipFs = FileSystems.newFileSystem(targetZipPath, (ClassLoader) null) ){ - // Convert csv data to input stream. - InputStream inputStream = new ByteArrayInputStream(csvData.getBytes(StandardCharsets.UTF_8)); - Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs); - // Set transform type according to whether target file exists. - TransformType type = Files.exists(targetTxtFilePath) - ? TransformType.TABLE_REPLACED - : TransformType.TABLE_ADDED; - // Copy csv input stream into the zip file, replacing it if it already exists. - Files.copy(inputStream, targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING); - zipTarget.feedTransformResult.tableTransformResults.add(new TableTransformResult(tableName, type)); - } catch (Exception e) { - status.fail("Unknown error encountered while transforming zip file", e); - } - } +public class ReplaceFileFromStringTransformation extends StringTransformation { } diff --git a/src/main/java/com/conveyal/datatools/manager/models/transform/StringTransformation.java b/src/main/java/com/conveyal/datatools/manager/models/transform/StringTransformation.java new file mode 100644 index 000000000..c845d4f37 --- /dev/null +++ b/src/main/java/com/conveyal/datatools/manager/models/transform/StringTransformation.java @@ -0,0 +1,63 @@ +package com.conveyal.datatools.manager.models.transform; + +import com.conveyal.datatools.common.status.MonitorableJob; +import com.conveyal.datatools.manager.models.TableTransformResult; +import com.conveyal.datatools.manager.models.TransformType; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.FileSystem; +import java.nio.file.FileSystems; +import java.nio.file.Files; +import java.nio.file.StandardCopyOption; + +public class StringTransformation extends ZipTransformation { + + public static StringTransformation create(String csvData, String table) { + StringTransformation transformation = new StringTransformation(); + transformation.csvData = csvData; + transformation.table = table; + return transformation; + } + + @Override + public void validateParameters(MonitorableJob.Status status) { + if (csvData == null) { + status.fail("CSV data must not be null"); + } + } + + @Override + public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status status) { + String tableName = table + ".txt"; + Path targetZipPath = Paths.get(zipTarget.gtfsFile.getAbsolutePath()); + try ( + FileSystem targetZipFs = FileSystems.newFileSystem(targetZipPath, (ClassLoader) null); + InputStream inputStream = new ByteArrayInputStream(csvData.getBytes(StandardCharsets.UTF_8)); + ) { + Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs); + TransformType type = Files.exists(targetTxtFilePath) + ? TransformType.TABLE_REPLACED + : TransformType.TABLE_ADDED; + // Copy csv input stream into the zip file, replacing it if it already exists. + Files.copy(inputStream, targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING); + final int NEW_LINE_CHARACTER_CODE = 10; + int lineCount = (int) csvData.chars().filter(c -> c == NEW_LINE_CHARACTER_CODE).count(); + int addedCount = type == TransformType.TABLE_ADDED ? lineCount : 0; + int updatedCount = type == TransformType.TABLE_MODIFIED ? lineCount : 0; + zipTarget.feedTransformResult.tableTransformResults.add(new TableTransformResult( + tableName, + type, + 0, + updatedCount, + addedCount, + 0 + )); + } catch (Exception e) { + status.fail("Unknown error encountered while transforming zip file", e); + } + } +} diff --git a/src/test/java/com/conveyal/datatools/manager/jobs/ArbitraryTransformJobTest.java b/src/test/java/com/conveyal/datatools/manager/jobs/ArbitraryTransformJobTest.java index e37db9786..3bb40f228 100644 --- a/src/test/java/com/conveyal/datatools/manager/jobs/ArbitraryTransformJobTest.java +++ b/src/test/java/com/conveyal/datatools/manager/jobs/ArbitraryTransformJobTest.java @@ -8,9 +8,12 @@ import com.conveyal.datatools.manager.models.FeedVersion; import com.conveyal.datatools.manager.models.Project; import com.conveyal.datatools.manager.models.Snapshot; +import com.conveyal.datatools.manager.models.TableTransformResult; +import com.conveyal.datatools.manager.models.transform.AddCustomFileFromStringTransformation; import com.conveyal.datatools.manager.models.transform.DeleteRecordsTransformation; import com.conveyal.datatools.manager.models.transform.FeedTransformRules; import com.conveyal.datatools.manager.models.transform.FeedTransformation; +import com.conveyal.datatools.manager.models.transform.PreserveCustomFieldsTransformation; import com.conveyal.datatools.manager.models.transform.ReplaceFileFromStringTransformation; import com.conveyal.datatools.manager.models.transform.ReplaceFileFromVersionTransformation; import com.conveyal.datatools.manager.persistence.Persistence; @@ -22,11 +25,17 @@ import org.junit.jupiter.api.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.supercsv.io.CsvMapReader; +import org.supercsv.prefs.CsvPreference; +import java.io.File; +import java.io.InputStream; import java.io.IOException; +import java.io.InputStreamReader; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.UUID; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; @@ -101,7 +110,7 @@ public void tearDownTest() { * into the target version's GTFS file. */ @Test - public void canReplaceGtfsPlusFileFromVersion() throws IOException { + void canReplaceGtfsPlusFileFromVersion() throws IOException { final String table = "stop_attributes"; // Create source version (folder contains stop_attributes file). sourceVersion = createFeedVersion( @@ -126,7 +135,7 @@ public void canReplaceGtfsPlusFileFromVersion() throws IOException { } @Test - public void canDeleteTrips() throws IOException { + void canDeleteTrips() throws IOException { // Add delete trips transformation. List routeIds = new ArrayList<>(); // Collect route_id values. @@ -160,7 +169,7 @@ public void canDeleteTrips() throws IOException { } @Test - public void replaceGtfsPlusFileFailsIfSourceIsMissing() throws IOException { + void replaceGtfsPlusFileFailsIfSourceIsMissing() throws IOException { sourceVersion = createFeedVersion( feedSource, zipFolderFiles("fake-agency-with-only-calendar") @@ -181,7 +190,7 @@ public void replaceGtfsPlusFileFailsIfSourceIsMissing() throws IOException { } @Test - public void canReplaceFeedInfo() throws SQLException, IOException { + void canReplaceFeedInfo() throws SQLException, IOException { // Generate random UUID for feedId, which gets placed into the csv data. final String feedId = UUID.randomUUID().toString(); final String feedInfoContent = generateFeedInfo(feedId); @@ -215,6 +224,48 @@ public void canReplaceFeedInfo() throws SQLException, IOException { ); } + @Test + void canPreserveCustomFieldsInStops() throws IOException { + String stops = generateStopsWithCustomFields(); + FeedTransformation transformation = PreserveCustomFieldsTransformation.create(stops, "stops"); + FeedTransformRules transformRules = new FeedTransformRules(transformation); + feedSource.transformRules.add(transformRules); + Persistence.feedSources.replace(feedSource.id, feedSource); + targetVersion = createFeedVersion( + feedSource, + zipFolderFiles("fake-agency-with-only-calendar-dates") + ); + TableTransformResult transformResult = targetVersion.feedTransformResult.tableTransformResults.get(0); + assertEquals( + 2, + transformResult.customColumnsAdded, + "stops.txt custom column count should equal input csv data # of custom columns" + ); + assertEquals( + 2, + transformResult.updatedCount, + "stops.txt row count modified with custom content should equal input csv data # of custom columns" + ); + } + + @Test + void canAddCustomFile() throws IOException { + String customCsv = generateCustomCsvData(); + FeedTransformation transformation = AddCustomFileFromStringTransformation.create(customCsv, "custom-file"); + FeedTransformRules transformRules = new FeedTransformRules(transformation); + feedSource.transformRules.add(transformRules); + Persistence.feedSources.replace(feedSource.id, feedSource); + targetVersion = createFeedVersion( + feedSource, + zipFolderFiles("fake-agency-with-only-calendar-dates") + ); + assertEquals( + 2, + targetVersion.feedTransformResult.tableTransformResults.get(0).addedCount, + "custom-file.txt custom row count should equal input csv data # of rows" + ); + } + private static String generateFeedInfo(String feedId) { // Add feed_info csv data (purposefully with two rows, even though this is not valid GTFS). return String.format( @@ -222,4 +273,15 @@ private static String generateFeedInfo(String feedId) { feedId ); } + private static String generateStopsWithCustomFields() { + return "stop_id,custom_column1,custom_column2" + + "\n4u6g,customValue1,customValue2" + + "\n1234567,customValue3,customValue4"; + } + + private static String generateCustomCsvData() { + return "custom_column1,custom_column2,custom_column3" + + "\ncustomValue1,customValue2,customValue3" + + "\ncustomValue4,customValue5,customValue6"; + } }