Skip to content

Commit

Permalink
Support 'NULL' as Null in csv parser.
Browse files Browse the repository at this point in the history
  • Loading branch information
dhegberg committed Nov 1, 2024
1 parent 752561a commit 8e69bed
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 3 deletions.
1 change: 1 addition & 0 deletions datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ parquet = { workspace = true, optional = true, default-features = true }
paste = "1.0.15"
pin-project-lite = "^0.2.7"
rand = { workspace = true }
regex = { workspace = true }
sqlparser = { workspace = true }
tempfile = { workspace = true }
tokio = { workspace = true }
Expand Down
10 changes: 7 additions & 3 deletions datafusion/core/src/datasource/file_format/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ use datafusion_physical_expr_common::sort_expr::LexRequirement;
use futures::stream::BoxStream;
use futures::{pin_mut, Stream, StreamExt, TryStreamExt};
use object_store::{delimited::newline_delimited_stream, ObjectMeta, ObjectStore};
use regex::Regex;

#[derive(Default)]
/// Factory struct used to create [CsvFormatFactory]
Expand Down Expand Up @@ -454,7 +455,8 @@ impl CsvFormat {
.has_header
.unwrap_or(state.config_options().catalog.has_header),
)
.with_delimiter(self.options.delimiter);
.with_delimiter(self.options.delimiter)
.with_null_regex(Regex::new(r"^NULL$|^$").unwrap());

if let Some(comment) = self.options.comment {
format = format.with_comment(comment);
Expand Down Expand Up @@ -760,7 +762,7 @@ mod tests {

let projection = None;
let exec =
get_exec(&state, "aggregate_test_100.csv", projection, None, true).await?;
get_exec(&state, "aggregate_test_100_with_nulls.csv", projection, None, true).await?;

let x: Vec<String> = exec
.schema()
Expand All @@ -782,7 +784,9 @@ mod tests {
"c10: Utf8",
"c11: Float64",
"c12: Float64",
"c13: Utf8"
"c13: Utf8",
"c14: Null",
"c15: Null"
],
x
);
Expand Down

0 comments on commit 8e69bed

Please sign in to comment.