Skip to content

Commit

Permalink
Merge branch 'ibi-group:dev' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
mvanlaar authored Jul 10, 2023
2 parents bb39907 + 568d9fd commit 45dec10
Show file tree
Hide file tree
Showing 8 changed files with 317 additions and 61 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@
<groupId>com.github.conveyal</groupId>
<artifactId>gtfs-lib</artifactId>
<!-- Latest dev build on jitpack.io -->
<version>a3e5707</version>
<version>f2ceb59</version>
<!-- Exclusions added in order to silence SLF4J warnings about multiple bindings:
http://www.slf4j.org/codes.html#multiple_bindings
-->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ public class TableTransformResult implements Serializable {
public int deletedCount;
public int updatedCount;
public int addedCount;
public int customColumnsAdded;
public TransformType transformType;
public String tableName;

Expand All @@ -21,11 +22,27 @@ public TableTransformResult(String tableName, TransformType transformType) {
this.transformType = transformType;
}

public TableTransformResult(
String tableName,
TransformType transformType,
int deletedCount,
int updatedCount,
int addedCount,
int customColumnsAdded
) {
this.tableName = tableName;
this.transformType = transformType;
this.deletedCount = deletedCount;
this.updatedCount = updatedCount;
this.addedCount = addedCount;
this.customColumnsAdded = customColumnsAdded;
}

public TableTransformResult(String tableName, int deletedCount, int updatedCount, int addedCount) {
this.tableName = tableName;
this.transformType = TransformType.TABLE_MODIFIED;
this.deletedCount = deletedCount;
this.updatedCount = updatedCount;
this.addedCount = addedCount;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package com.conveyal.datatools.manager.models.transform;

import com.conveyal.datatools.common.status.MonitorableJob;

public class AddCustomFileFromStringTransformation extends StringTransformation {

// Additional create method required to ensure transformation type is AddCustomFile in tests.
// Otherwise, we'd use the StringTransformation#create which doesn't differentiate types and hence
// would fail table name tests.
public static AddCustomFileFromStringTransformation create(String csvData, String table) {
AddCustomFileFromStringTransformation transformation = new AddCustomFileFromStringTransformation();
transformation.csvData = csvData;
transformation.table = table;
return transformation;
}

@Override
public void validateTableName(MonitorableJob.Status status) {
if (table.contains(".txt")) {
status.fail("CSV Table name should not contain .txt");
}
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,9 @@
@JsonSubTypes.Type(value = DeleteRecordsTransformation.class, name = "DeleteRecordsTransformation"),
@JsonSubTypes.Type(value = NormalizeFieldTransformation.class, name = "NormalizeFieldTransformation"),
@JsonSubTypes.Type(value = ReplaceFileFromVersionTransformation.class, name = "ReplaceFileFromVersionTransformation"),
@JsonSubTypes.Type(value = ReplaceFileFromStringTransformation.class, name = "ReplaceFileFromStringTransformation")
@JsonSubTypes.Type(value = ReplaceFileFromStringTransformation.class, name = "ReplaceFileFromStringTransformation"),
@JsonSubTypes.Type(value = PreserveCustomFieldsTransformation.class, name = "PreserveCustomFieldsTransformation"),
@JsonSubTypes.Type(value = AddCustomFileFromStringTransformation.class, name = "AddCustomFileTransformation")
})
public abstract class FeedTransformation<Target extends FeedTransformTarget> implements Serializable {
private static final long serialVersionUID = 1L;
Expand Down Expand Up @@ -68,6 +70,8 @@ public void doTransform(FeedTransformTarget target, MonitorableJob.Status status
status.fail(
String.format("Transformation must be of type '%s'.", getTransformationTypeName())
);
} catch (Exception e) {
status.fail(e.toString());
}
}

Expand All @@ -78,7 +82,7 @@ public void doTransform(FeedTransformTarget target, MonitorableJob.Status status
* @param target The database-bound or ZIP-file-bound target the transformation will operate on.
* @param status Used to report success or failure status and details.
*/
public abstract void transform(Target target, MonitorableJob.Status status);
public abstract void transform(Target target, MonitorableJob.Status status) throws Exception;

/**
* At the moment, used by DbTransformation to validate field names.
Expand All @@ -98,7 +102,6 @@ protected void validateTableName(MonitorableJob.Status status) {
// Validate fields before running transform.
if (GtfsUtils.getGtfsTable(table) == null) {
status.fail("Table must be valid GTFS spec table name (without .txt).");
return;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
package com.conveyal.datatools.manager.models.transform;

import com.conveyal.datatools.common.status.MonitorableJob;
import com.conveyal.datatools.manager.models.TableTransformResult;
import com.conveyal.datatools.manager.models.TransformType;
import org.supercsv.io.CsvMapReader;
import com.conveyal.gtfs.loader.Table;

import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.File;
import java.io.FileWriter;
import java.io.StringReader;
import java.io.IOException;
import java.nio.file.*;
import java.util.*;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.ArrayUtils;
import org.supercsv.io.CsvMapWriter;
import org.supercsv.prefs.CsvPreference;


/**
* This feed transformation will attempt to preserve any custom fields from an entered csv in the final GTFS output.
*/
public class PreserveCustomFieldsTransformation extends ZipTransformation {
/** no-arg constructor for de/serialization */
public PreserveCustomFieldsTransformation() {}

public static PreserveCustomFieldsTransformation create(String csvData, String table) {
PreserveCustomFieldsTransformation transformation = new PreserveCustomFieldsTransformation();
transformation.csvData = csvData;
transformation.table = table;
return transformation;
}

@Override
public void validateParameters(MonitorableJob.Status status) {
if (csvData == null) {
status.fail("CSV data must not be null (delete table not yet supported)");
}
}

/**
* This method creates a hash map of the GTFS table keys to the custom CSV values for efficient lookup of custom values.
* The hash map key is the key values of the GTFS table (e.g. stop_id for stops) concatenated by an underscore.
* The hash map value is the CsvMapReader (mapping of column to row value).
*/
private static HashMap<String, Map<String, String>> createCsvHashMap(CsvMapReader reader, String[] headers, List<String> tablePrimaryKeys) throws IOException {
HashMap<String, Map<String, String>> lookup = new HashMap<>();
Map<String, String> nextLine;
while ((nextLine = reader.read(headers)) != null) {
List<String> customCsvKeyValues = tablePrimaryKeys.stream().map(nextLine::get).collect(Collectors.toList());
String hashKey = StringUtils.join(customCsvKeyValues, "_");
lookup.put(hashKey, nextLine);
}
return lookup;
}

@Override
public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status status) throws Exception{
String tableName = table + ".txt";
Path targetZipPath = Paths.get(zipTarget.gtfsFile.getAbsolutePath());
Optional<Table> streamResult = Arrays.stream(Table.tablesInOrder)
.filter(t -> t.name.equals(table))
.findFirst();

if (!streamResult.isPresent()) {
throw new Exception(String.format("could not find specTable for table %s", table));
}
Table specTable = streamResult.get();

try (FileSystem targetZipFs = FileSystems.newFileSystem(targetZipPath, (ClassLoader) null)) {
List<String> specTableFields = specTable.specFields().stream().map(f -> f.name).collect(Collectors.toList());
List<String> tablePrimaryKeys = specTable.getPrimaryKeyNames();

Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs);

final File tempFile = File.createTempFile(tableName + "-temp", ".txt");
File output = File.createTempFile(tableName + "-output-temp", ".txt");
int rowsModified = 0;
List<String> customFields;

try (
InputStream is = Files.newInputStream(targetTxtFilePath);
CsvMapReader customFileReader = new CsvMapReader(new StringReader(csvData), CsvPreference.STANDARD_PREFERENCE);
CsvMapReader editorFileReader = new CsvMapReader(new InputStreamReader(is), CsvPreference.STANDARD_PREFERENCE);
CsvMapWriter writer = new CsvMapWriter(new FileWriter(output), CsvPreference.STANDARD_PREFERENCE);
){

String[] customHeaders = customFileReader.getHeader(true);
String[] editorHeaders = editorFileReader.getHeader(true);

customFields = Arrays.stream(customHeaders).filter(h -> !specTableFields.contains(h)).collect(Collectors.toList());
if (customFields.isEmpty()) return;
String[] fullHeaders = ArrayUtils.addAll(editorHeaders, customFields.toArray(new String[0]));

HashMap<String, Map<String, String>> customFieldsLookup = createCsvHashMap(customFileReader, customHeaders, tablePrimaryKeys);
writer.writeHeader(fullHeaders);

Map<String, String> row;
while ((row = editorFileReader.read(editorHeaders)) != null) {
List<String> editorCsvPrimaryKeyValues = tablePrimaryKeys.stream()
.map(row::get)
.collect(Collectors.toList());

String hashKey = StringUtils.join(editorCsvPrimaryKeyValues, "_");
Map<String, String> customCsvValues = customFieldsLookup.get(hashKey);
Map<String, String> finalRow = row;
customFields.forEach(customField -> {
String value = customCsvValues == null ? null : customCsvValues.get(customField);
finalRow.put(customField, value);
});
if (customCsvValues != null) rowsModified++;
writer.write(finalRow, fullHeaders);
}
}
Files.copy(output.toPath(), targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING);
tempFile.deleteOnExit();
output.deleteOnExit();
zipTarget.feedTransformResult.tableTransformResults.add(new TableTransformResult(
tableName,
TransformType.TABLE_MODIFIED,
0,
rowsModified,
0,
customFields.size()
));
} catch (NoSuchFileException e) {
status.fail("Source version does not contain table: " + tableName, e);
} catch (IOException e) {
status.fail("An exception occurred when writing output with custom fields", e);
} catch (Exception e) {
status.fail("Unknown error encountered while transforming zip file", e);
}
}
}
Original file line number Diff line number Diff line change
@@ -1,60 +1,9 @@
package com.conveyal.datatools.manager.models.transform;

import com.conveyal.datatools.common.status.MonitorableJob;
import com.conveyal.datatools.manager.models.TableTransformResult;
import com.conveyal.datatools.manager.models.TransformType;

import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;

/**
* This feed transformation will replace a file in the target zip (table) with the provided csv data.
*/
public class ReplaceFileFromStringTransformation extends ZipTransformation {

public static ReplaceFileFromStringTransformation create(String csvData, String table) {
ReplaceFileFromStringTransformation transformation = new ReplaceFileFromStringTransformation();
transformation.csvData = csvData;
transformation.table = table;
return transformation;
}

@Override
public void validateParameters(MonitorableJob.Status status) {
if (csvData == null) {
status.fail("CSV data must not be null (delete table not yet supported)");
}
}

@Override
public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status status) {
// if (csvData == null) {
// TODO: If this is a null value, delete the table (not yet supported).
// }

String tableName = table + ".txt";
// Run the replace transformation
Path targetZipPath = Paths.get(zipTarget.gtfsFile.getAbsolutePath());
try( FileSystem targetZipFs = FileSystems.newFileSystem(targetZipPath, (ClassLoader) null) ){
// Convert csv data to input stream.
InputStream inputStream = new ByteArrayInputStream(csvData.getBytes(StandardCharsets.UTF_8));
Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs);
// Set transform type according to whether target file exists.
TransformType type = Files.exists(targetTxtFilePath)
? TransformType.TABLE_REPLACED
: TransformType.TABLE_ADDED;
// Copy csv input stream into the zip file, replacing it if it already exists.
Files.copy(inputStream, targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING);
zipTarget.feedTransformResult.tableTransformResults.add(new TableTransformResult(tableName, type));
} catch (Exception e) {
status.fail("Unknown error encountered while transforming zip file", e);
}
}
public class ReplaceFileFromStringTransformation extends StringTransformation {
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
package com.conveyal.datatools.manager.models.transform;

import com.conveyal.datatools.common.status.MonitorableJob;
import com.conveyal.datatools.manager.models.TableTransformResult;
import com.conveyal.datatools.manager.models.TransformType;

import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;

public class StringTransformation extends ZipTransformation {

public static StringTransformation create(String csvData, String table) {
StringTransformation transformation = new StringTransformation();
transformation.csvData = csvData;
transformation.table = table;
return transformation;
}

@Override
public void validateParameters(MonitorableJob.Status status) {
if (csvData == null) {
status.fail("CSV data must not be null");
}
}

@Override
public void transform(FeedTransformZipTarget zipTarget, MonitorableJob.Status status) {
String tableName = table + ".txt";
Path targetZipPath = Paths.get(zipTarget.gtfsFile.getAbsolutePath());
try (
FileSystem targetZipFs = FileSystems.newFileSystem(targetZipPath, (ClassLoader) null);
InputStream inputStream = new ByteArrayInputStream(csvData.getBytes(StandardCharsets.UTF_8));
) {
Path targetTxtFilePath = getTablePathInZip(tableName, targetZipFs);
TransformType type = Files.exists(targetTxtFilePath)
? TransformType.TABLE_REPLACED
: TransformType.TABLE_ADDED;
// Copy csv input stream into the zip file, replacing it if it already exists.
Files.copy(inputStream, targetTxtFilePath, StandardCopyOption.REPLACE_EXISTING);
final int NEW_LINE_CHARACTER_CODE = 10;
int lineCount = (int) csvData.chars().filter(c -> c == NEW_LINE_CHARACTER_CODE).count();
int addedCount = type == TransformType.TABLE_ADDED ? lineCount : 0;
int updatedCount = type == TransformType.TABLE_MODIFIED ? lineCount : 0;
zipTarget.feedTransformResult.tableTransformResults.add(new TableTransformResult(
tableName,
type,
0,
updatedCount,
addedCount,
0
));
} catch (Exception e) {
status.fail("Unknown error encountered while transforming zip file", e);
}
}
}
Loading

0 comments on commit 45dec10

Please sign in to comment.