Skip to content

Commit

Permalink
Remove invisible characters when parsing feed ids
Browse files Browse the repository at this point in the history
  • Loading branch information
leonardehrenfried committed Feb 23, 2024
1 parent 7be0113 commit ec2e9cd
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 2 deletions.
20 changes: 20 additions & 0 deletions src/main/java/org/opentripplanner/framework/lang/StringUtils.java
Original file line number Diff line number Diff line change
@@ -1,11 +1,20 @@
package org.opentripplanner.framework.lang;

import java.util.regex.Pattern;
import javax.annotation.Nonnull;

/**
* OTP String utils extending the Java lang String...
*/
public class StringUtils {
/**
* Regex to find unprintable characters like newlines and 'ZERO WIDTH SPACE' (U+200B).
*/
private static final String INVISIBLE_CHARS_REGEX = "\\p{C}";
/**
* Patterns are immutable and thread safe.
*/
private static final Pattern INVISIBLE_CHARS_PATTERN = Pattern.compile(INVISIBLE_CHARS_REGEX);

private StringUtils() {}

Expand Down Expand Up @@ -119,4 +128,15 @@ public static String quoteReplace(@Nonnull String text) {
public static String kebabCase(String input) {
return input.toLowerCase().replace('_', '-');
}

/**
* Removes unprintable control characters like newlines, tabs and invisible whitespace
* like 'ZERO WIDTH SPACE' (U+200B) that don't have an immediate visual representation.
* <p>
* Note that visible whitespace characters like U+0020 and U+2000 are considered visible and
* therefore not removed.
*/
public static String removeInvisibleChars(String input) {
return INVISIBLE_CHARS_PATTERN.matcher(input).replaceAll("");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
import java.io.Serializable;
import java.util.Arrays;
import java.util.List;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import org.opentripplanner.framework.lang.StringUtils;

public final class FeedScopedId implements Serializable, Comparable<FeedScopedId> {

Expand Down Expand Up @@ -59,7 +59,12 @@ public static FeedScopedId parse(String value) throws IllegalArgumentException {
* Parses a string consisting of concatenated FeedScopedIds to a List
*/
public static List<FeedScopedId> parseList(String s) {
return Arrays.stream(s.split(",")).map(FeedScopedId::parse).collect(Collectors.toList());
return Arrays
.stream(s.split(","))
.map(input -> StringUtils.removeInvisibleChars(input).strip())
.filter(i -> !i.isBlank())
.map(FeedScopedId::parse)
.toList();
}

public static boolean isValidString(String value) throws IllegalArgumentException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.Arguments;
import org.junit.jupiter.params.provider.ValueSource;
import org.opentripplanner.test.support.VariableSource;

class StringUtilsTest {
Expand Down Expand Up @@ -82,4 +83,16 @@ void padRight() {
void quoteReplace() {
assertEquals("\"key\" : \"value\"", StringUtils.quoteReplace("'key' : 'value'"));
}

@ParameterizedTest
@ValueSource(
strings = {
"\u200B",
"\n",
"\t"
}
)
void removeInvisibleChars(String input) {
assertEquals("", StringUtils.removeInvisibleChars(input));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,37 @@
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertNull;

import java.util.List;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.ValueSource;

class FeedScopedIdTest {

private static final List<FeedScopedId> TRIMET_123 = List.of(new FeedScopedId("trimet", "123"));

@Test
void ofNullable() {
assertEquals(new FeedScopedId("FEED", "ID"), FeedScopedId.ofNullable("FEED", "ID"));
assertNull(FeedScopedId.ofNullable("FEED", null));
}

@ParameterizedTest
@ValueSource(
strings = {
"trimet:123",
"trimet:123 ",
"trimet:123, ",
",trimet:123 , ",
",trimet:123 , ,\u200B,",
" trimet:123 ",
"\u200Btrimet:123",
"\u200B\u200Btri\u200Bmet:123\u200B",
"\ntrimet:123\t",
"\ntri\nmet:123\t",
}
)
void parseList(String input) {
assertEquals(TRIMET_123, FeedScopedId.parseList(input));
}
}

0 comments on commit ec2e9cd

Please sign in to comment.