diff --git a/release-notes/VERSION b/release-notes/VERSION index 8be0571..1d7be43 100644 --- a/release-notes/VERSION +++ b/release-notes/VERSION @@ -9,7 +9,7 @@ Project: jackson-dataformat-csv #50: Support `JsonGenerator.Feature.IGNORE_KNOWN` for CSV, to ignoring extra columns #53: Add a way to specify "null value" (String) for `CsvGenerator` to use when writing `null`s (part of `CsvSchema`; method `withNullValue()`) - +#57: Support simple array types 2.4.4 (not yet released) diff --git a/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvParser.java b/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvParser.java index be2f139..24d76ce 100644 --- a/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvParser.java +++ b/src/main/java/com/fasterxml/jackson/dataformat/csv/CsvParser.java @@ -129,7 +129,16 @@ private Feature(boolean defaultState) { * end-array is returned. */ protected final static int STATE_UNNAMED_VALUE = 4; - + + /** + * State in which a column value has been determined to be of + * an array type, and will need to be split into multiple + * values. This can currently only occur for named values. + * + * @since 2.5 + */ + protected final static int STATE_IN_ARRAY = 5; + /** * State in which end marker is returned; either * null (if no array wrapping), or @@ -137,8 +146,8 @@ private Feature(boolean defaultState) { * This step will loop, returning series of nulls * if {@link #nextToken} is called multiple times. */ - protected final static int STATE_DOC_END = 5; - + protected final static int STATE_DOC_END = 6; + /* /********************************************************************** /* Configuration @@ -150,7 +159,7 @@ private Feature(boolean defaultState) { */ protected ObjectCodec _objectCodec; - protected int _csvFeatures; + protected int _formatFeatures; /** * Definition of columns being read. Initialized to "empty" instance, which @@ -206,6 +215,18 @@ private Feature(boolean defaultState) { */ protected byte[] _binaryValue; + /** + * Pointer to the first character of the next array value to return. + */ + protected int _arrayValueStart; + + /** + * Contents of the cell, to be split into distinct array values. + */ + protected String _arrayValue; + + protected char _arraySeparator; + /* /********************************************************************** /* Helper objects @@ -240,7 +261,7 @@ public CsvParser(IOContext ctxt, BufferRecycler br, _textBuffer = new TextBuffer(br); DupDetector dups = JsonParser.Feature.STRICT_DUPLICATE_DETECTION.enabledIn(parserFeatures) ? DupDetector.rootDetector(this) : null; - _csvFeatures = csvFeatures; + _formatFeatures = csvFeatures; _parsingContext = JsonReadContext.createRootContext(dups); _reader = new CsvDecoder(this, ctxt, reader, _schema, _textBuffer, isEnabled(JsonParser.Feature.AUTO_CLOSE_SOURCE), @@ -316,7 +337,7 @@ public int releaseBuffered(Writer out) throws IOException { */ public JsonParser enable(Feature f) { - _csvFeatures |= f.getMask(); + _formatFeatures |= f.getMask(); return this; } @@ -326,7 +347,7 @@ public JsonParser enable(Feature f) */ public JsonParser disable(Feature f) { - _csvFeatures &= ~f.getMask(); + _formatFeatures &= ~f.getMask(); return this; } @@ -349,7 +370,7 @@ public JsonParser configure(Feature f, boolean state) * is enabled. */ public boolean isEnabled(Feature f) { - return (_csvFeatures & f.getMask()) != 0; + return (_formatFeatures & f.getMask()) != 0; } /** @@ -413,13 +434,15 @@ public boolean isExpectedStartArrayToken() { case JsonTokenId.ID_START_ARRAY: return true; } - // Otherwise: may coerce into array, unless column type prevents it - - // !!! TODO - + // Otherwise: may coerce into array, iff we have essentially "untyped" column + CsvSchema.Column column = _schema.column(_columnIndex); + if (column.getType() == CsvSchema.ColumnType.STRING) { + _startArray(column); + return true; + } return false; } - + @Override public String getCurrentName() throws IOException { return _currentName; @@ -445,6 +468,8 @@ public JsonToken nextToken() throws IOException return (_currToken = _handleNamedValue()); case STATE_UNNAMED_VALUE: return (_currToken = _handleUnnamedValue()); + case STATE_IN_ARRAY: + return (_currToken = _handleArrayValue()); case STATE_DOC_END: _reader.close(); if (_parsingContext.inRoot()) { @@ -483,17 +508,18 @@ protected JsonToken _handleStartDoc() throws IOException /* Only one real complication, actually; empy documents (zero bytes). * Those have no entries. Should be easy enough to detect like so: */ + final boolean wrapAsArray = Feature.WRAP_AS_ARRAY.enabledIn(_formatFeatures); if (!_reader.hasMoreInput()) { _state = STATE_DOC_END; // but even empty sequence must still be wrapped in logical array - if (isEnabled(Feature.WRAP_AS_ARRAY)) { + if (wrapAsArray) { _parsingContext = _reader.childArrayContext(_parsingContext); return JsonToken.START_ARRAY; } return null; } - if (isEnabled(Feature.WRAP_AS_ARRAY)) { + if (wrapAsArray) { _parsingContext = _reader.childArrayContext(_parsingContext); _state = STATE_RECORD_START; return JsonToken.START_ARRAY; @@ -573,8 +599,13 @@ protected JsonToken _handleNextEntryExpectEOL() throws IOException protected JsonToken _handleNamedValue() throws IOException { - _state = STATE_NEXT_ENTRY; + CsvSchema.Column column = _schema.column(_columnIndex); ++_columnIndex; + if (column.isArray()) { + _startArray(column); + return JsonToken.START_ARRAY; + } + _state = STATE_NEXT_ENTRY; return JsonToken.VALUE_STRING; } @@ -597,6 +628,26 @@ protected JsonToken _handleUnnamedValue() throws IOException return JsonToken.VALUE_STRING; } + protected JsonToken _handleArrayValue() throws IOException + { + int offset = _arrayValueStart; + if (offset < 0) { // just returned last value + _parsingContext = _parsingContext.getParent(); + // no arrays in arrays (at least for now), so must be back to named value + _state = STATE_NEXT_ENTRY; + return JsonToken.END_ARRAY; + } + int end = _arrayValue.indexOf(_arraySeparator, offset); + if (end < 0) { // last value + _currentValue = (offset == 0) ? _arrayValue : _arrayValue.substring(offset); + _arrayValueStart = end; + } else { + _currentValue = _arrayValue.substring(offset, end); + _arrayValueStart = end+1; + } + return JsonToken.VALUE_STRING; + } + /** * Method called to process the expected header line */ @@ -738,27 +789,27 @@ public int getIntValue() throws IOException { public long getLongValue() throws IOException { return _reader.getLongValue(); } - + @Override public BigInteger getBigIntegerValue() throws IOException { return _reader.getBigIntegerValue(); } - + @Override public float getFloatValue() throws IOException { return _reader.getFloatValue(); } - + @Override public double getDoubleValue() throws IOException { return _reader.getDoubleValue(); } - + @Override public BigDecimal getDecimalValue() throws IOException { return _reader.getDecimalValue(); } - + /* /********************************************************************** /* Helper methods from base class @@ -801,4 +852,19 @@ public ByteArrayBuilder _getByteArrayBuilder() } return _byteArrayBuilder; } + + protected void _startArray(CsvSchema.Column column) + { + _currToken = JsonToken.START_ARRAY; + _parsingContext = _parsingContext.createChildArrayContext(_reader.getCurrentRow(), + _reader.getCurrentColumn()); + _state = STATE_IN_ARRAY; + _arrayValueStart = 0; + _arrayValue = _currentValue; + int sep = column.getArrayElementSeparator(); + if (sep <= 0) { + sep = _schema.getArrayElementSeparator(); + } + _arraySeparator = (char) sep; + } } diff --git a/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java b/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java index 52e337d..a3afc32 100644 --- a/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java +++ b/src/main/java/com/fasterxml/jackson/dataformat/csv/impl/CsvDecoder.java @@ -385,6 +385,20 @@ public JsonLocation getCurrentLocation() return new JsonLocation(_inputSource, _currInputProcessed + ptr - 1, _currInputRow, col); } + + public final int getCurrentRow() { + return _currInputRow; + } + + public final int getCurrentColumn() { + int ptr = _inputPtr; + // One twist: when dealing with a "pending LF", need to + // go back one position when calculating location + if (_pendingLF > 1) { // 1 is used as marker for end-of-input + --ptr; + } + return ptr - _currInputRowStart + 1; // 1-based + } /* /********************************************************************** diff --git a/src/test/java/com/fasterxml/jackson/dataformat/csv/failing/ArrayReadTest.java b/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/ArrayReadTest.java similarity index 66% rename from src/test/java/com/fasterxml/jackson/dataformat/csv/failing/ArrayReadTest.java rename to src/test/java/com/fasterxml/jackson/dataformat/csv/deser/ArrayReadTest.java index d521d3c..7bc3fa0 100644 --- a/src/test/java/com/fasterxml/jackson/dataformat/csv/failing/ArrayReadTest.java +++ b/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/ArrayReadTest.java @@ -1,4 +1,4 @@ -package com.fasterxml.jackson.dataformat.csv.failing; +package com.fasterxml.jackson.dataformat.csv.deser; import com.fasterxml.jackson.annotation.JsonPropertyOrder; @@ -19,10 +19,26 @@ static class ValueEntry { /********************************************************************** */ - public void testSimpleExplicit() throws Exception + private final CsvMapper MAPPER = mapperForCsv(); + + public void testSimpleExplicitLooseTyping() throws Exception { - CsvMapper mapper = mapperForCsv(); - ValueEntry value = mapper.readerWithSchemaFor(ValueEntry.class) + ValueEntry value = MAPPER.readerWithSchemaFor(ValueEntry.class) + .readValue("foo,1;2;3,stuff"); + assertNotNull(value); + assertEquals("foo", value.id); + assertEquals("stuff", value.extra); + int[] v = value.values; + assertNotNull(v); + assertEquals(3, v.length); + assertEquals(1, v[0]); + assertEquals(2, v[1]); + assertEquals(3, v[2]); + } + + public void testSimpleExplicitStrictTyping() throws Exception + { + ValueEntry value = MAPPER.readerWithTypedSchemaFor(ValueEntry.class) .readValue("foo,1;2;3,stuff"); assertNotNull(value); assertEquals("foo", value.id); diff --git a/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/ParserTrimSpacesTest.java b/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/ParserTrimSpacesTest.java index f9b2781..e0e930f 100644 --- a/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/ParserTrimSpacesTest.java +++ b/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/ParserTrimSpacesTest.java @@ -1,11 +1,9 @@ package com.fasterxml.jackson.dataformat.csv.deser; import com.fasterxml.jackson.annotation.JsonPropertyOrder; + import com.fasterxml.jackson.databind.MappingIterator; -import com.fasterxml.jackson.dataformat.csv.CsvMapper; -import com.fasterxml.jackson.dataformat.csv.CsvParser; -import com.fasterxml.jackson.dataformat.csv.ModuleTestBase; -import com.fasterxml.jackson.dataformat.csv.CsvParser.Feature; +import com.fasterxml.jackson.dataformat.csv.*; public class ParserTrimSpacesTest extends ModuleTestBase { diff --git a/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserNoSchema.java b/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserNoSchema.java index 4cdeddd..ce2ea24 100644 --- a/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserNoSchema.java +++ b/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserNoSchema.java @@ -3,10 +3,7 @@ import java.util.List; import com.fasterxml.jackson.databind.MappingIterator; -import com.fasterxml.jackson.dataformat.csv.CsvMapper; -import com.fasterxml.jackson.dataformat.csv.CsvParser; -import com.fasterxml.jackson.dataformat.csv.ModuleTestBase; -import com.fasterxml.jackson.dataformat.csv.CsvParser.Feature; +import com.fasterxml.jackson.dataformat.csv.*; /** * Test to verify that CSV content can be parsed without schema diff --git a/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserQuotes.java b/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserQuotes.java index c6d0432..a87be16 100644 --- a/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserQuotes.java +++ b/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserQuotes.java @@ -2,11 +2,7 @@ import com.fasterxml.jackson.annotation.JsonPropertyOrder; import com.fasterxml.jackson.databind.*; -import com.fasterxml.jackson.dataformat.csv.CsvMapper; -import com.fasterxml.jackson.dataformat.csv.CsvParser; -import com.fasterxml.jackson.dataformat.csv.CsvSchema; -import com.fasterxml.jackson.dataformat.csv.ModuleTestBase; -import com.fasterxml.jackson.dataformat.csv.CsvParser.Feature; +import com.fasterxml.jackson.dataformat.csv.*; public class TestParserQuotes extends ModuleTestBase { diff --git a/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserSequences.java b/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserSequences.java index 28547ba..9fd485e 100644 --- a/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserSequences.java +++ b/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserSequences.java @@ -6,11 +6,7 @@ import com.fasterxml.jackson.annotation.JsonPropertyOrder; import com.fasterxml.jackson.databind.MappingIterator; import com.fasterxml.jackson.databind.ObjectWriter; -import com.fasterxml.jackson.dataformat.csv.CsvMapper; -import com.fasterxml.jackson.dataformat.csv.CsvParser; -import com.fasterxml.jackson.dataformat.csv.CsvSchema; -import com.fasterxml.jackson.dataformat.csv.ModuleTestBase; -import com.fasterxml.jackson.dataformat.csv.CsvParser.Feature; +import com.fasterxml.jackson.dataformat.csv.*; /** * Tests for verifying behavior of enclosing input stream as diff --git a/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserStrictQuoting.java b/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserStrictQuoting.java index eff7ee6..44c05b6 100644 --- a/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserStrictQuoting.java +++ b/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserStrictQuoting.java @@ -1,11 +1,7 @@ package com.fasterxml.jackson.dataformat.csv.deser; import com.fasterxml.jackson.annotation.JsonPropertyOrder; -import com.fasterxml.jackson.dataformat.csv.CsvGenerator; -import com.fasterxml.jackson.dataformat.csv.CsvMapper; -import com.fasterxml.jackson.dataformat.csv.CsvSchema; -import com.fasterxml.jackson.dataformat.csv.ModuleTestBase; -import com.fasterxml.jackson.dataformat.csv.CsvGenerator.Feature; +import com.fasterxml.jackson.dataformat.csv.*; // Tests for [Issue#26] public class TestParserStrictQuoting extends ModuleTestBase diff --git a/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserWithHeader.java b/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserWithHeader.java index c51818c..b65a0bf 100644 --- a/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserWithHeader.java +++ b/src/test/java/com/fasterxml/jackson/dataformat/csv/deser/TestParserWithHeader.java @@ -5,13 +5,7 @@ import com.fasterxml.jackson.annotation.JsonPropertyOrder; import com.fasterxml.jackson.core.JsonToken; import com.fasterxml.jackson.databind.MappingIterator; -import com.fasterxml.jackson.dataformat.csv.CsvFactory; -import com.fasterxml.jackson.dataformat.csv.CsvMapper; -import com.fasterxml.jackson.dataformat.csv.CsvParser; -import com.fasterxml.jackson.dataformat.csv.CsvSchema; -import com.fasterxml.jackson.dataformat.csv.ModuleTestBase; -import com.fasterxml.jackson.dataformat.csv.CsvParser.Feature; -import com.fasterxml.jackson.dataformat.csv.CsvSchema.Column; +import com.fasterxml.jackson.dataformat.csv.*; public class TestParserWithHeader extends ModuleTestBase {