From 223bb02fce886b47dc1ac81e2eda2bd3c6d60c3e Mon Sep 17 00:00:00 2001 From: Bruce Ritchie Date: Tue, 29 Oct 2024 14:39:48 -0400 Subject: [PATCH] docs: switch completely to generated docs for scalar and aggregate functions (#13161) * Remove _new docs, update index, update docs build script to point to main .md files for aggregate & scalar function pages. * update documentation --- dev/update_function_docs.sh | 20 +- .../user-guide/sql/aggregate_functions.md | 839 +++- .../user-guide/sql/aggregate_functions_new.md | 865 ---- docs/source/user-guide/sql/index.rst | 2 - .../source/user-guide/sql/scalar_functions.md | 4334 +++++++++++++++- .../user-guide/sql/scalar_functions_new.md | 4365 ----------------- 6 files changed, 5125 insertions(+), 5300 deletions(-) delete mode 100644 docs/source/user-guide/sql/aggregate_functions_new.md delete mode 100644 docs/source/user-guide/sql/scalar_functions_new.md diff --git a/dev/update_function_docs.sh b/dev/update_function_docs.sh index 13bc22afcc13..ad3bc9c7f69c 100755 --- a/dev/update_function_docs.sh +++ b/dev/update_function_docs.sh @@ -24,7 +24,7 @@ SOURCE_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cd "${SOURCE_DIR}/../" && pwd -TARGET_FILE="docs/source/user-guide/sql/aggregate_functions_new.md" +TARGET_FILE="docs/source/user-guide/sql/aggregate_functions.md" PRINT_AGGREGATE_FUNCTION_DOCS_COMMAND="cargo run --manifest-path datafusion/core/Cargo.toml --bin print_functions_docs -- aggregate" echo "Inserting header" @@ -56,13 +56,7 @@ update documentation for an individual UDF or the dev/update_function_docs.sh file for updating surrounding text. --> -# Aggregate Functions (NEW) - -Note: this documentation is in the process of being migrated to be [automatically created from the codebase]. -Please see the [Aggregate Functions (old)](aggregate_functions.md) page for -the rest of the documentation. - -[automatically created from the codebase]: https://github.com/apache/datafusion/issues/12740 +# Aggregate Functions Aggregate functions operate on a set of values to compute a single result. EOF @@ -75,7 +69,7 @@ npx prettier@2.3.2 --write "$TARGET_FILE" echo "'$TARGET_FILE' successfully updated!" -TARGET_FILE="docs/source/user-guide/sql/scalar_functions_new.md" +TARGET_FILE="docs/source/user-guide/sql/scalar_functions.md" PRINT_SCALAR_FUNCTION_DOCS_COMMAND="cargo run --manifest-path datafusion/core/Cargo.toml --bin print_functions_docs -- scalar" echo "Inserting header" @@ -107,13 +101,7 @@ update documentation for an individual UDF or the dev/update_function_docs.sh file for updating surrounding text. --> -# Scalar Functions (NEW) - -Note: this documentation is in the process of being migrated to be [automatically created from the codebase]. -Please see the [Scalar Functions (old)](aggregate_functions.md) page for -the rest of the documentation. - -[automatically created from the codebase]: https://github.com/apache/datafusion/issues/12740 +# Scalar Functions EOF diff --git a/docs/source/user-guide/sql/aggregate_functions.md b/docs/source/user-guide/sql/aggregate_functions.md index 77f527c92cda..d9fc28a81772 100644 --- a/docs/source/user-guide/sql/aggregate_functions.md +++ b/docs/source/user-guide/sql/aggregate_functions.md @@ -17,6 +17,843 @@ under the License. --> + + # Aggregate Functions -Note: this documentation has been migrated to [Aggregate Functions (new)](aggregate_functions_new.md) +Aggregate functions operate on a set of values to compute a single result. + +## General Functions + +- [array_agg](#array_agg) +- [avg](#avg) +- [bit_and](#bit_and) +- [bit_or](#bit_or) +- [bit_xor](#bit_xor) +- [bool_and](#bool_and) +- [bool_or](#bool_or) +- [count](#count) +- [first_value](#first_value) +- [grouping](#grouping) +- [last_value](#last_value) +- [max](#max) +- [mean](#mean) +- [median](#median) +- [min](#min) +- [string_agg](#string_agg) +- [sum](#sum) +- [var](#var) +- [var_pop](#var_pop) +- [var_population](#var_population) +- [var_samp](#var_samp) +- [var_sample](#var_sample) + +### `array_agg` + +Returns an array created from the expression elements. If ordering is required, elements are inserted in the specified order. + +``` +array_agg(expression [ORDER BY expression]) +``` + +#### Arguments + +- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> SELECT array_agg(column_name ORDER BY other_column) FROM table_name; ++-----------------------------------------------+ +| array_agg(column_name ORDER BY other_column) | ++-----------------------------------------------+ +| [element1, element2, element3] | ++-----------------------------------------------+ +``` + +### `avg` + +Returns the average of numeric values in the specified column. + +``` +avg(expression) +``` + +#### Arguments + +- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> SELECT avg(column_name) FROM table_name; ++---------------------------+ +| avg(column_name) | ++---------------------------+ +| 42.75 | ++---------------------------+ +``` + +#### Aliases + +- mean + +### `bit_and` + +Computes the bitwise AND of all non-null input values. + +``` +bit_and(expression) +``` + +#### Arguments + +- **expression**: Integer expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `bit_or` + +Computes the bitwise OR of all non-null input values. + +``` +bit_or(expression) +``` + +#### Arguments + +- **expression**: Integer expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `bit_xor` + +Computes the bitwise exclusive OR of all non-null input values. + +``` +bit_xor(expression) +``` + +#### Arguments + +- **expression**: Integer expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `bool_and` + +Returns true if all non-null input values are true, otherwise false. + +``` +bool_and(expression) +``` + +#### Arguments + +- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> SELECT bool_and(column_name) FROM table_name; ++----------------------------+ +| bool_and(column_name) | ++----------------------------+ +| true | ++----------------------------+ +``` + +### `bool_or` + +Returns true if all non-null input values are true, otherwise false. + +``` +bool_and(expression) +``` + +#### Arguments + +- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> SELECT bool_and(column_name) FROM table_name; ++----------------------------+ +| bool_and(column_name) | ++----------------------------+ +| true | ++----------------------------+ +``` + +### `count` + +Returns the number of non-null values in the specified column. To include null values in the total count, use `count(*)`. + +``` +count(expression) +``` + +#### Arguments + +- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> SELECT count(column_name) FROM table_name; ++-----------------------+ +| count(column_name) | ++-----------------------+ +| 100 | ++-----------------------+ + +> SELECT count(*) FROM table_name; ++------------------+ +| count(*) | ++------------------+ +| 120 | ++------------------+ +``` + +### `first_value` + +Returns the first element in an aggregation group according to the requested ordering. If no ordering is given, returns an arbitrary element from the group. + +``` +first_value(expression [ORDER BY expression]) +``` + +#### Arguments + +- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> SELECT first_value(column_name ORDER BY other_column) FROM table_name; ++-----------------------------------------------+ +| first_value(column_name ORDER BY other_column)| ++-----------------------------------------------+ +| first_element | ++-----------------------------------------------+ +``` + +### `grouping` + +Returns 1 if the data is aggregated across the specified column, or 0 if it is not aggregated in the result set. + +``` +grouping(expression) +``` + +#### Arguments + +- **expression**: Expression to evaluate whether data is aggregated across the specified column. Can be a constant, column, or function. + +#### Example + +```sql +> SELECT column_name, GROUPING(column_name) AS group_column + FROM table_name + GROUP BY GROUPING SETS ((column_name), ()); ++-------------+-------------+ +| column_name | group_column | ++-------------+-------------+ +| value1 | 0 | +| value2 | 0 | +| NULL | 1 | ++-------------+-------------+ +``` + +### `last_value` + +Returns the first element in an aggregation group according to the requested ordering. If no ordering is given, returns an arbitrary element from the group. + +``` +first_value(expression [ORDER BY expression]) +``` + +#### Arguments + +- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> SELECT first_value(column_name ORDER BY other_column) FROM table_name; ++-----------------------------------------------+ +| first_value(column_name ORDER BY other_column)| ++-----------------------------------------------+ +| first_element | ++-----------------------------------------------+ +``` + +### `max` + +Returns the maximum value in the specified column. + +``` +max(expression) +``` + +#### Arguments + +- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> SELECT max(column_name) FROM table_name; ++----------------------+ +| max(column_name) | ++----------------------+ +| 150 | ++----------------------+ +``` + +### `mean` + +_Alias of [avg](#avg)._ + +### `median` + +Returns the median value in the specified column. + +``` +median(expression) +``` + +#### Arguments + +- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> SELECT median(column_name) FROM table_name; ++----------------------+ +| median(column_name) | ++----------------------+ +| 45.5 | ++----------------------+ +``` + +### `min` + +Returns the maximum value in the specified column. + +``` +max(expression) +``` + +#### Arguments + +- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> SELECT max(column_name) FROM table_name; ++----------------------+ +| max(column_name) | ++----------------------+ +| 150 | ++----------------------+ +``` + +### `string_agg` + +Concatenates the values of string expressions and places separator values between them. + +``` +string_agg(expression, delimiter) +``` + +#### Arguments + +- **expression**: The string expression to concatenate. Can be a column or any valid string expression. +- **delimiter**: A literal string used as a separator between the concatenated values. + +#### Example + +```sql +> SELECT string_agg(name, ', ') AS names_list + FROM employee; ++--------------------------+ +| names_list | ++--------------------------+ +| Alice, Bob, Charlie | ++--------------------------+ +``` + +### `sum` + +Returns the sum of all values in the specified column. + +``` +sum(expression) +``` + +#### Arguments + +- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> SELECT sum(column_name) FROM table_name; ++-----------------------+ +| sum(column_name) | ++-----------------------+ +| 12345 | ++-----------------------+ +``` + +### `var` + +Returns the statistical sample variance of a set of numbers. + +``` +var(expression) +``` + +#### Arguments + +- **expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Aliases + +- var_sample +- var_samp + +### `var_pop` + +Returns the statistical population variance of a set of numbers. + +``` +var_pop(expression) +``` + +#### Arguments + +- **expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Aliases + +- var_population + +### `var_population` + +_Alias of [var_pop](#var_pop)._ + +### `var_samp` + +_Alias of [var](#var)._ + +### `var_sample` + +_Alias of [var](#var)._ + +## Statistical Functions + +- [corr](#corr) +- [covar](#covar) +- [covar_pop](#covar_pop) +- [covar_samp](#covar_samp) +- [nth_value](#nth_value) +- [regr_avgx](#regr_avgx) +- [regr_avgy](#regr_avgy) +- [regr_count](#regr_count) +- [regr_intercept](#regr_intercept) +- [regr_r2](#regr_r2) +- [regr_slope](#regr_slope) +- [regr_sxx](#regr_sxx) +- [regr_sxy](#regr_sxy) +- [regr_syy](#regr_syy) +- [stddev](#stddev) +- [stddev_pop](#stddev_pop) +- [stddev_samp](#stddev_samp) + +### `corr` + +Returns the coefficient of correlation between two numeric values. + +``` +corr(expression1, expression2) +``` + +#### Arguments + +- **expression1**: First expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **expression2**: Second expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> SELECT corr(column1, column2) FROM table_name; ++--------------------------------+ +| corr(column1, column2) | ++--------------------------------+ +| 0.85 | ++--------------------------------+ +``` + +### `covar` + +_Alias of [covar_samp](#covar_samp)._ + +### `covar_pop` + +Returns the sample covariance of a set of number pairs. + +``` +covar_samp(expression1, expression2) +``` + +#### Arguments + +- **expression1**: First expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **expression2**: Second expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> SELECT covar_samp(column1, column2) FROM table_name; ++-----------------------------------+ +| covar_samp(column1, column2) | ++-----------------------------------+ +| 8.25 | ++-----------------------------------+ +``` + +### `covar_samp` + +Returns the sample covariance of a set of number pairs. + +``` +covar_samp(expression1, expression2) +``` + +#### Arguments + +- **expression1**: First expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **expression2**: Second expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> SELECT covar_samp(column1, column2) FROM table_name; ++-----------------------------------+ +| covar_samp(column1, column2) | ++-----------------------------------+ +| 8.25 | ++-----------------------------------+ +``` + +#### Aliases + +- covar + +### `nth_value` + +Returns the nth value in a group of values. + +``` +nth_value(expression, n ORDER BY expression) +``` + +#### Arguments + +- **expression**: The column or expression to retrieve the nth value from. +- **n**: The position (nth) of the value to retrieve, based on the ordering. + +#### Example + +```sql +> SELECT dept_id, salary, NTH_VALUE(salary, 2) OVER (PARTITION BY dept_id ORDER BY salary ASC) AS second_salary_by_dept + FROM employee; ++---------+--------+-------------------------+ +| dept_id | salary | second_salary_by_dept | ++---------+--------+-------------------------+ +| 1 | 30000 | NULL | +| 1 | 40000 | 40000 | +| 1 | 50000 | 40000 | +| 2 | 35000 | NULL | +| 2 | 45000 | 45000 | ++---------+--------+-------------------------+ +``` + +### `regr_avgx` + +Computes the average of the independent variable (input) expression_x for the non-null paired data points. + +``` +regr_avgx(expression_y, expression_x) +``` + +#### Arguments + +- **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **expression_x**: Independent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `regr_avgy` + +Computes the average of the dependent variable (output) expression_y for the non-null paired data points. + +``` +regr_avgy(expression_y, expression_x) +``` + +#### Arguments + +- **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **expression_x**: Independent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `regr_count` + +Counts the number of non-null paired data points. + +``` +regr_count(expression_y, expression_x) +``` + +#### Arguments + +- **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **expression_x**: Independent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `regr_intercept` + +Computes the y-intercept of the linear regression line. For the equation (y = kx + b), this function returns b. + +``` +regr_intercept(expression_y, expression_x) +``` + +#### Arguments + +- **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **expression_x**: Independent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `regr_r2` + +Computes the square of the correlation coefficient between the independent and dependent variables. + +``` +regr_r2(expression_y, expression_x) +``` + +#### Arguments + +- **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **expression_x**: Independent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `regr_slope` + +Returns the slope of the linear regression line for non-null pairs in aggregate columns. Given input column Y and X: regr_slope(Y, X) returns the slope (k in Y = k\*X + b) using minimal RSS fitting. + +``` +regr_slope(expression_y, expression_x) +``` + +#### Arguments + +- **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **expression_x**: Independent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `regr_sxx` + +Computes the sum of squares of the independent variable. + +``` +regr_sxx(expression_y, expression_x) +``` + +#### Arguments + +- **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **expression_x**: Independent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `regr_sxy` + +Computes the sum of products of paired data points. + +``` +regr_sxy(expression_y, expression_x) +``` + +#### Arguments + +- **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **expression_x**: Independent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `regr_syy` + +Computes the sum of squares of the dependent variable. + +``` +regr_syy(expression_y, expression_x) +``` + +#### Arguments + +- **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **expression_x**: Independent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `stddev` + +Returns the standard deviation of a set of numbers. + +``` +stddev(expression) +``` + +#### Arguments + +- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> SELECT stddev(column_name) FROM table_name; ++----------------------+ +| stddev(column_name) | ++----------------------+ +| 12.34 | ++----------------------+ +``` + +#### Aliases + +- stddev_samp + +### `stddev_pop` + +Returns the standard deviation of a set of numbers. + +``` +stddev(expression) +``` + +#### Arguments + +- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> SELECT stddev(column_name) FROM table_name; ++----------------------+ +| stddev(column_name) | ++----------------------+ +| 12.34 | ++----------------------+ +``` + +### `stddev_samp` + +_Alias of [stddev](#stddev)._ + +## Approximate Functions + +- [approx_distinct](#approx_distinct) +- [approx_median](#approx_median) +- [approx_percentile_cont](#approx_percentile_cont) +- [approx_percentile_cont_with_weight](#approx_percentile_cont_with_weight) + +### `approx_distinct` + +Returns the approximate number of distinct input values calculated using the HyperLogLog algorithm. + +``` +approx_distinct(expression) +``` + +#### Arguments + +- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> SELECT approx_distinct(column_name) FROM table_name; ++-----------------------------------+ +| approx_distinct(column_name) | ++-----------------------------------+ +| 42 | ++-----------------------------------+ +``` + +### `approx_median` + +Returns the approximate median (50th percentile) of input values. It is an alias of `approx_percentile_cont(x, 0.5)`. + +``` +approx_median(expression) +``` + +#### Arguments + +- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> SELECT approx_median(column_name) FROM table_name; ++-----------------------------------+ +| approx_median(column_name) | ++-----------------------------------+ +| 23.5 | ++-----------------------------------+ +``` + +### `approx_percentile_cont` + +Returns the approximate percentile of input values using the t-digest algorithm. + +``` +approx_percentile_cont(expression, percentile, centroids) +``` + +#### Arguments + +- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **percentile**: Percentile to compute. Must be a float value between 0 and 1 (inclusive). +- **centroids**: Number of centroids to use in the t-digest algorithm. _Default is 100_. A higher number results in more accurate approximation but requires more memory. + +#### Example + +```sql +> SELECT approx_percentile_cont(column_name, 0.75, 100) FROM table_name; ++-------------------------------------------------+ +| approx_percentile_cont(column_name, 0.75, 100) | ++-------------------------------------------------+ +| 65.0 | ++-------------------------------------------------+ +``` + +### `approx_percentile_cont_with_weight` + +Returns the weighted approximate percentile of input values using the t-digest algorithm. + +``` +approx_percentile_cont_with_weight(expression, weight, percentile) +``` + +#### Arguments + +- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **weight**: Expression to use as weight. Can be a constant, column, or function, and any combination of arithmetic operators. +- **percentile**: Percentile to compute. Must be a float value between 0 and 1 (inclusive). + +#### Example + +```sql +> SELECT approx_percentile_cont_with_weight(column_name, weight_column, 0.90) FROM table_name; ++----------------------------------------------------------------------+ +| approx_percentile_cont_with_weight(column_name, weight_column, 0.90) | ++----------------------------------------------------------------------+ +| 78.5 | ++----------------------------------------------------------------------+ +``` diff --git a/docs/source/user-guide/sql/aggregate_functions_new.md b/docs/source/user-guide/sql/aggregate_functions_new.md deleted file mode 100644 index ad6d15b94ee5..000000000000 --- a/docs/source/user-guide/sql/aggregate_functions_new.md +++ /dev/null @@ -1,865 +0,0 @@ - - - - -# Aggregate Functions (NEW) - -Note: this documentation is in the process of being migrated to be [automatically created from the codebase]. -Please see the [Aggregate Functions (old)](aggregate_functions.md) page for -the rest of the documentation. - -[automatically created from the codebase]: https://github.com/apache/datafusion/issues/12740 - -Aggregate functions operate on a set of values to compute a single result. - -## General Functions - -- [array_agg](#array_agg) -- [avg](#avg) -- [bit_and](#bit_and) -- [bit_or](#bit_or) -- [bit_xor](#bit_xor) -- [bool_and](#bool_and) -- [bool_or](#bool_or) -- [count](#count) -- [first_value](#first_value) -- [grouping](#grouping) -- [last_value](#last_value) -- [max](#max) -- [mean](#mean) -- [median](#median) -- [min](#min) -- [string_agg](#string_agg) -- [sum](#sum) -- [var](#var) -- [var_pop](#var_pop) -- [var_population](#var_population) -- [var_samp](#var_samp) -- [var_sample](#var_sample) - -### `array_agg` - -Returns an array created from the expression elements. If ordering is required, elements are inserted in the specified order. - -``` -array_agg(expression [ORDER BY expression]) -``` - -#### Arguments - -- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> SELECT array_agg(column_name ORDER BY other_column) FROM table_name; -+-----------------------------------------------+ -| array_agg(column_name ORDER BY other_column) | -+-----------------------------------------------+ -| [element1, element2, element3] | -+-----------------------------------------------+ -``` - -### `avg` - -Returns the average of numeric values in the specified column. - -``` -avg(expression) -``` - -#### Arguments - -- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> SELECT avg(column_name) FROM table_name; -+---------------------------+ -| avg(column_name) | -+---------------------------+ -| 42.75 | -+---------------------------+ -``` - -#### Aliases - -- mean - -### `bit_and` - -Computes the bitwise AND of all non-null input values. - -``` -bit_and(expression) -``` - -#### Arguments - -- **expression**: Integer expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `bit_or` - -Computes the bitwise OR of all non-null input values. - -``` -bit_or(expression) -``` - -#### Arguments - -- **expression**: Integer expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `bit_xor` - -Computes the bitwise exclusive OR of all non-null input values. - -``` -bit_xor(expression) -``` - -#### Arguments - -- **expression**: Integer expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `bool_and` - -Returns true if all non-null input values are true, otherwise false. - -``` -bool_and(expression) -``` - -#### Arguments - -- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> SELECT bool_and(column_name) FROM table_name; -+----------------------------+ -| bool_and(column_name) | -+----------------------------+ -| true | -+----------------------------+ -``` - -### `bool_or` - -Returns true if all non-null input values are true, otherwise false. - -``` -bool_and(expression) -``` - -#### Arguments - -- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> SELECT bool_and(column_name) FROM table_name; -+----------------------------+ -| bool_and(column_name) | -+----------------------------+ -| true | -+----------------------------+ -``` - -### `count` - -Returns the number of non-null values in the specified column. To include null values in the total count, use `count(*)`. - -``` -count(expression) -``` - -#### Arguments - -- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> SELECT count(column_name) FROM table_name; -+-----------------------+ -| count(column_name) | -+-----------------------+ -| 100 | -+-----------------------+ - -> SELECT count(*) FROM table_name; -+------------------+ -| count(*) | -+------------------+ -| 120 | -+------------------+ -``` - -### `first_value` - -Returns the first element in an aggregation group according to the requested ordering. If no ordering is given, returns an arbitrary element from the group. - -``` -first_value(expression [ORDER BY expression]) -``` - -#### Arguments - -- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> SELECT first_value(column_name ORDER BY other_column) FROM table_name; -+-----------------------------------------------+ -| first_value(column_name ORDER BY other_column)| -+-----------------------------------------------+ -| first_element | -+-----------------------------------------------+ -``` - -### `grouping` - -Returns 1 if the data is aggregated across the specified column, or 0 if it is not aggregated in the result set. - -``` -grouping(expression) -``` - -#### Arguments - -- **expression**: Expression to evaluate whether data is aggregated across the specified column. Can be a constant, column, or function. - -#### Example - -```sql -> SELECT column_name, GROUPING(column_name) AS group_column - FROM table_name - GROUP BY GROUPING SETS ((column_name), ()); -+-------------+-------------+ -| column_name | group_column | -+-------------+-------------+ -| value1 | 0 | -| value2 | 0 | -| NULL | 1 | -+-------------+-------------+ -``` - -### `last_value` - -Returns the first element in an aggregation group according to the requested ordering. If no ordering is given, returns an arbitrary element from the group. - -``` -first_value(expression [ORDER BY expression]) -``` - -#### Arguments - -- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> SELECT first_value(column_name ORDER BY other_column) FROM table_name; -+-----------------------------------------------+ -| first_value(column_name ORDER BY other_column)| -+-----------------------------------------------+ -| first_element | -+-----------------------------------------------+ -``` - -### `max` - -Returns the maximum value in the specified column. - -``` -max(expression) -``` - -#### Arguments - -- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> SELECT max(column_name) FROM table_name; -+----------------------+ -| max(column_name) | -+----------------------+ -| 150 | -+----------------------+ -``` - -### `mean` - -_Alias of [avg](#avg)._ - -### `median` - -Returns the median value in the specified column. - -``` -median(expression) -``` - -#### Arguments - -- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> SELECT median(column_name) FROM table_name; -+----------------------+ -| median(column_name) | -+----------------------+ -| 45.5 | -+----------------------+ -``` - -### `min` - -Returns the maximum value in the specified column. - -``` -max(expression) -``` - -#### Arguments - -- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> SELECT max(column_name) FROM table_name; -+----------------------+ -| max(column_name) | -+----------------------+ -| 150 | -+----------------------+ -``` - -### `string_agg` - -Concatenates the values of string expressions and places separator values between them. - -``` -string_agg(expression, delimiter) -``` - -#### Arguments - -- **expression**: The string expression to concatenate. Can be a column or any valid string expression. -- **delimiter**: A literal string used as a separator between the concatenated values. - -#### Example - -```sql -> SELECT string_agg(name, ', ') AS names_list - FROM employee; -+--------------------------+ -| names_list | -+--------------------------+ -| Alice, Bob, Charlie | -+--------------------------+ -``` - -### `sum` - -Returns the sum of all values in the specified column. - -``` -sum(expression) -``` - -#### Arguments - -- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> SELECT sum(column_name) FROM table_name; -+-----------------------+ -| sum(column_name) | -+-----------------------+ -| 12345 | -+-----------------------+ -``` - -### `var` - -Returns the statistical sample variance of a set of numbers. - -``` -var(expression) -``` - -#### Arguments - -- **expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Aliases - -- var_sample -- var_samp - -### `var_pop` - -Returns the statistical population variance of a set of numbers. - -``` -var_pop(expression) -``` - -#### Arguments - -- **expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Aliases - -- var_population - -### `var_population` - -_Alias of [var_pop](#var_pop)._ - -### `var_samp` - -_Alias of [var](#var)._ - -### `var_sample` - -_Alias of [var](#var)._ - -## Statistical Functions - -- [corr](#corr) -- [covar](#covar) -- [covar_pop](#covar_pop) -- [covar_samp](#covar_samp) -- [nth_value](#nth_value) -- [regr_avgx](#regr_avgx) -- [regr_avgy](#regr_avgy) -- [regr_count](#regr_count) -- [regr_intercept](#regr_intercept) -- [regr_r2](#regr_r2) -- [regr_slope](#regr_slope) -- [regr_sxx](#regr_sxx) -- [regr_sxy](#regr_sxy) -- [regr_syy](#regr_syy) -- [stddev](#stddev) -- [stddev_pop](#stddev_pop) -- [stddev_samp](#stddev_samp) - -### `corr` - -Returns the coefficient of correlation between two numeric values. - -``` -corr(expression1, expression2) -``` - -#### Arguments - -- **expression1**: First expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **expression2**: Second expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> SELECT corr(column1, column2) FROM table_name; -+--------------------------------+ -| corr(column1, column2) | -+--------------------------------+ -| 0.85 | -+--------------------------------+ -``` - -### `covar` - -_Alias of [covar_samp](#covar_samp)._ - -### `covar_pop` - -Returns the sample covariance of a set of number pairs. - -``` -covar_samp(expression1, expression2) -``` - -#### Arguments - -- **expression1**: First expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **expression2**: Second expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> SELECT covar_samp(column1, column2) FROM table_name; -+-----------------------------------+ -| covar_samp(column1, column2) | -+-----------------------------------+ -| 8.25 | -+-----------------------------------+ -``` - -### `covar_samp` - -Returns the sample covariance of a set of number pairs. - -``` -covar_samp(expression1, expression2) -``` - -#### Arguments - -- **expression1**: First expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **expression2**: Second expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> SELECT covar_samp(column1, column2) FROM table_name; -+-----------------------------------+ -| covar_samp(column1, column2) | -+-----------------------------------+ -| 8.25 | -+-----------------------------------+ -``` - -#### Aliases - -- covar - -### `nth_value` - -Returns the nth value in a group of values. - -``` -nth_value(expression, n ORDER BY expression) -``` - -#### Arguments - -- **expression**: The column or expression to retrieve the nth value from. -- **n**: The position (nth) of the value to retrieve, based on the ordering. - -#### Example - -```sql -> SELECT dept_id, salary, NTH_VALUE(salary, 2) OVER (PARTITION BY dept_id ORDER BY salary ASC) AS second_salary_by_dept - FROM employee; -+---------+--------+-------------------------+ -| dept_id | salary | second_salary_by_dept | -+---------+--------+-------------------------+ -| 1 | 30000 | NULL | -| 1 | 40000 | 40000 | -| 1 | 50000 | 40000 | -| 2 | 35000 | NULL | -| 2 | 45000 | 45000 | -+---------+--------+-------------------------+ -``` - -### `regr_avgx` - -Computes the average of the independent variable (input) expression_x for the non-null paired data points. - -``` -regr_avgx(expression_y, expression_x) -``` - -#### Arguments - -- **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **expression_x**: Independent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `regr_avgy` - -Computes the average of the dependent variable (output) expression_y for the non-null paired data points. - -``` -regr_avgy(expression_y, expression_x) -``` - -#### Arguments - -- **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **expression_x**: Independent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `regr_count` - -Counts the number of non-null paired data points. - -``` -regr_count(expression_y, expression_x) -``` - -#### Arguments - -- **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **expression_x**: Independent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `regr_intercept` - -Computes the y-intercept of the linear regression line. For the equation (y = kx + b), this function returns b. - -``` -regr_intercept(expression_y, expression_x) -``` - -#### Arguments - -- **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **expression_x**: Independent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `regr_r2` - -Computes the square of the correlation coefficient between the independent and dependent variables. - -``` -regr_r2(expression_y, expression_x) -``` - -#### Arguments - -- **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **expression_x**: Independent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `regr_slope` - -Returns the slope of the linear regression line for non-null pairs in aggregate columns. Given input column Y and X: regr_slope(Y, X) returns the slope (k in Y = k\*X + b) using minimal RSS fitting. - -``` -regr_slope(expression_y, expression_x) -``` - -#### Arguments - -- **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **expression_x**: Independent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `regr_sxx` - -Computes the sum of squares of the independent variable. - -``` -regr_sxx(expression_y, expression_x) -``` - -#### Arguments - -- **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **expression_x**: Independent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `regr_sxy` - -Computes the sum of products of paired data points. - -``` -regr_sxy(expression_y, expression_x) -``` - -#### Arguments - -- **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **expression_x**: Independent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `regr_syy` - -Computes the sum of squares of the dependent variable. - -``` -regr_syy(expression_y, expression_x) -``` - -#### Arguments - -- **expression_y**: Dependent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **expression_x**: Independent variable expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `stddev` - -Returns the standard deviation of a set of numbers. - -``` -stddev(expression) -``` - -#### Arguments - -- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> SELECT stddev(column_name) FROM table_name; -+----------------------+ -| stddev(column_name) | -+----------------------+ -| 12.34 | -+----------------------+ -``` - -#### Aliases - -- stddev_samp - -### `stddev_pop` - -Returns the standard deviation of a set of numbers. - -``` -stddev(expression) -``` - -#### Arguments - -- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> SELECT stddev(column_name) FROM table_name; -+----------------------+ -| stddev(column_name) | -+----------------------+ -| 12.34 | -+----------------------+ -``` - -### `stddev_samp` - -_Alias of [stddev](#stddev)._ - -## Approximate Functions - -- [approx_distinct](#approx_distinct) -- [approx_median](#approx_median) -- [approx_percentile_cont](#approx_percentile_cont) -- [approx_percentile_cont_with_weight](#approx_percentile_cont_with_weight) - -### `approx_distinct` - -Returns the approximate number of distinct input values calculated using the HyperLogLog algorithm. - -``` -approx_distinct(expression) -``` - -#### Arguments - -- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> SELECT approx_distinct(column_name) FROM table_name; -+-----------------------------------+ -| approx_distinct(column_name) | -+-----------------------------------+ -| 42 | -+-----------------------------------+ -``` - -### `approx_median` - -Returns the approximate median (50th percentile) of input values. It is an alias of `approx_percentile_cont(x, 0.5)`. - -``` -approx_median(expression) -``` - -#### Arguments - -- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> SELECT approx_median(column_name) FROM table_name; -+-----------------------------------+ -| approx_median(column_name) | -+-----------------------------------+ -| 23.5 | -+-----------------------------------+ -``` - -### `approx_percentile_cont` - -Returns the approximate percentile of input values using the t-digest algorithm. - -``` -approx_percentile_cont(expression, percentile, centroids) -``` - -#### Arguments - -- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **percentile**: Percentile to compute. Must be a float value between 0 and 1 (inclusive). -- **centroids**: Number of centroids to use in the t-digest algorithm. _Default is 100_. A higher number results in more accurate approximation but requires more memory. - -#### Example - -```sql -> SELECT approx_percentile_cont(column_name, 0.75, 100) FROM table_name; -+-------------------------------------------------+ -| approx_percentile_cont(column_name, 0.75, 100) | -+-------------------------------------------------+ -| 65.0 | -+-------------------------------------------------+ -``` - -### `approx_percentile_cont_with_weight` - -Returns the weighted approximate percentile of input values using the t-digest algorithm. - -``` -approx_percentile_cont_with_weight(expression, weight, percentile) -``` - -#### Arguments - -- **expression**: The expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **weight**: Expression to use as weight. Can be a constant, column, or function, and any combination of arithmetic operators. -- **percentile**: Percentile to compute. Must be a float value between 0 and 1 (inclusive). - -#### Example - -```sql -> SELECT approx_percentile_cont_with_weight(column_name, weight_column, 0.90) FROM table_name; -+----------------------------------------------------------------------+ -| approx_percentile_cont_with_weight(column_name, weight_column, 0.90) | -+----------------------------------------------------------------------+ -| 78.5 | -+----------------------------------------------------------------------+ -``` diff --git a/docs/source/user-guide/sql/index.rst b/docs/source/user-guide/sql/index.rst index 8b8afc7b048a..4499aac53611 100644 --- a/docs/source/user-guide/sql/index.rst +++ b/docs/source/user-guide/sql/index.rst @@ -30,11 +30,9 @@ SQL Reference information_schema operators aggregate_functions - aggregate_functions_new window_functions window_functions_new scalar_functions - scalar_functions_new special_functions sql_status write_options diff --git a/docs/source/user-guide/sql/scalar_functions.md b/docs/source/user-guide/sql/scalar_functions.md index a8e25930bef7..98c44cbd981d 100644 --- a/docs/source/user-guide/sql/scalar_functions.md +++ b/docs/source/user-guide/sql/scalar_functions.md @@ -17,111 +17,4343 @@ under the License. --> + + # Scalar Functions -Scalar functions operate on a single row at a time and return a single value. +## Math Functions + +- [abs](#abs) +- [acos](#acos) +- [acosh](#acosh) +- [asin](#asin) +- [asinh](#asinh) +- [atan](#atan) +- [atan2](#atan2) +- [atanh](#atanh) +- [cbrt](#cbrt) +- [ceil](#ceil) +- [cos](#cos) +- [cosh](#cosh) +- [cot](#cot) +- [degrees](#degrees) +- [exp](#exp) +- [factorial](#factorial) +- [floor](#floor) +- [gcd](#gcd) +- [isnan](#isnan) +- [iszero](#iszero) +- [lcm](#lcm) +- [ln](#ln) +- [log](#log) +- [log10](#log10) +- [log2](#log2) +- [nanvl](#nanvl) +- [pi](#pi) +- [pow](#pow) +- [power](#power) +- [radians](#radians) +- [random](#random) +- [round](#round) +- [signum](#signum) +- [sin](#sin) +- [sinh](#sinh) +- [sqrt](#sqrt) +- [tan](#tan) +- [tanh](#tanh) +- [trunc](#trunc) + +### `abs` + +Returns the absolute value of a number. + +``` +abs(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `acos` + +Returns the arc cosine or inverse cosine of a number. + +``` +acos(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `acosh` + +Returns the area hyperbolic cosine or inverse hyperbolic cosine of a number. + +``` +acosh(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `asin` + +Returns the arc sine or inverse sine of a number. + +``` +asin(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `asinh` + +Returns the area hyperbolic sine or inverse hyperbolic sine of a number. + +``` +asinh(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `atan` + +Returns the arc tangent or inverse tangent of a number. + +``` +atan(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `atan2` + +Returns the arc tangent or inverse tangent of `expression_y / expression_x`. + +``` +atan2(expression_y, expression_x) +``` + +#### Arguments + +- **expression_y**: First numeric expression to operate on. + Can be a constant, column, or function, and any combination of arithmetic operators. +- **expression_x**: Second numeric expression to operate on. + Can be a constant, column, or function, and any combination of arithmetic operators. + +### `atanh` + +Returns the area hyperbolic tangent or inverse hyperbolic tangent of a number. + +``` +atanh(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `cbrt` + +Returns the cube root of a number. + +``` +cbrt(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `ceil` + +Returns the nearest integer greater than or equal to a number. + +``` +ceil(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `cos` + +Returns the cosine of a number. + +``` +cos(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `cosh` + +Returns the hyperbolic cosine of a number. + +``` +cosh(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `cot` + +Returns the cotangent of a number. + +``` +cot(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `degrees` + +Converts radians to degrees. + +``` +degrees(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `exp` + +Returns the base-e exponential of a number. + +``` +exp(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `factorial` + +Factorial. Returns 1 if value is less than 2. + +``` +factorial(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `floor` + +Returns the nearest integer less than or equal to a number. + +``` +floor(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `gcd` + +Returns the greatest common divisor of `expression_x` and `expression_y`. Returns 0 if both inputs are zero. + +``` +gcd(expression_x, expression_y) +``` + +#### Arguments + +- **expression_x**: First numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **expression_y**: Second numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `isnan` + +Returns true if a given number is +NaN or -NaN otherwise returns false. + +``` +isnan(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `iszero` + +Returns true if a given number is +0.0 or -0.0 otherwise returns false. + +``` +iszero(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `lcm` + +Returns the least common multiple of `expression_x` and `expression_y`. Returns 0 if either input is zero. + +``` +lcm(expression_x, expression_y) +``` + +#### Arguments + +- **expression_x**: First numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **expression_y**: Second numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `ln` + +Returns the natural logarithm of a number. + +``` +ln(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `log` + +Returns the base-x logarithm of a number. Can either provide a specified base, or if omitted then takes the base-10 of a number. + +``` +log(base, numeric_expression) +log(numeric_expression) +``` + +#### Arguments + +- **base**: Base numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `log10` + +Returns the base-10 logarithm of a number. + +``` +log10(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `log2` + +Returns the base-2 logarithm of a number. + +``` +log2(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `nanvl` + +Returns the first argument if it's not _NaN_. +Returns the second argument otherwise. + +``` +nanvl(expression_x, expression_y) +``` + +#### Arguments + +- **expression_x**: Numeric expression to return if it's not _NaN_. Can be a constant, column, or function, and any combination of arithmetic operators. +- **expression_y**: Numeric expression to return if the first expression is _NaN_. Can be a constant, column, or function, and any combination of arithmetic operators. + +### `pi` + +Returns an approximate value of π. + +``` +pi() +``` + +### `pow` + +_Alias of [power](#power)._ + +### `power` + +Returns a base expression raised to the power of an exponent. + +``` +power(base, exponent) +``` + +#### Arguments + +- **base**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **exponent**: Exponent numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Aliases + +- pow + +### `radians` + +Converts degrees to radians. + +``` +radians(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `random` + +Returns a random float value in the range [0, 1). +The random seed is unique to each row. + +``` +random() +``` + +### `round` + +Rounds a number to the nearest integer. + +``` +round(numeric_expression[, decimal_places]) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **decimal_places**: Optional. The number of decimal places to round to. Defaults to 0. + +### `signum` + +Returns the sign of a number. +Negative numbers return `-1`. +Zero and positive numbers return `1`. + +``` +signum(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `sin` + +Returns the sine of a number. + +``` +sin(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `sinh` + +Returns the hyperbolic sine of a number. + +``` +sinh(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `sqrt` + +Returns the square root of a number. + +``` +sqrt(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `tan` + +Returns the tangent of a number. + +``` +tan(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `tanh` + +Returns the hyperbolic tangent of a number. + +``` +tanh(numeric_expression) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. + +### `trunc` + +Truncates a number to a whole number or truncated to the specified decimal places. + +``` +trunc(numeric_expression[, decimal_places]) +``` + +#### Arguments + +- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **decimal_places**: Optional. The number of decimal places to + truncate to. Defaults to 0 (truncate to a whole number). If + `decimal_places` is a positive integer, truncates digits to the + right of the decimal point. If `decimal_places` is a negative + integer, replaces digits to the left of the decimal point with `0`. + +## Conditional Functions + +- [coalesce](#coalesce) +- [ifnull](#ifnull) +- [nullif](#nullif) +- [nvl](#nvl) +- [nvl2](#nvl2) + +### `coalesce` + +Returns the first of its arguments that is not _null_. Returns _null_ if all arguments are _null_. This function is often used to substitute a default value for _null_ values. + +``` +coalesce(expression1[, ..., expression_n]) +``` + +#### Arguments + +- **expression1, expression_n**: Expression to use if previous expressions are _null_. Can be a constant, column, or function, and any combination of arithmetic operators. Pass as many expression arguments as necessary. + +#### Example + +```sql +> select coalesce(null, null, 'datafusion'); ++----------------------------------------+ +| coalesce(NULL,NULL,Utf8("datafusion")) | ++----------------------------------------+ +| datafusion | ++----------------------------------------+ +``` + +### `ifnull` + +_Alias of [nvl](#nvl)._ + +### `nullif` + +Returns _null_ if _expression1_ equals _expression2_; otherwise it returns _expression1_. +This can be used to perform the inverse operation of [`coalesce`](#coalesce). + +``` +nullif(expression1, expression2) +``` + +#### Arguments + +- **expression1**: Expression to compare and return if equal to expression2. Can be a constant, column, or function, and any combination of operators. +- **expression2**: Expression to compare to expression1. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select nullif('datafusion', 'data'); ++-----------------------------------------+ +| nullif(Utf8("datafusion"),Utf8("data")) | ++-----------------------------------------+ +| datafusion | ++-----------------------------------------+ +> select nullif('datafusion', 'datafusion'); ++-----------------------------------------------+ +| nullif(Utf8("datafusion"),Utf8("datafusion")) | ++-----------------------------------------------+ +| | ++-----------------------------------------------+ +``` + +### `nvl` + +Returns _expression2_ if _expression1_ is NULL otherwise it returns _expression1_. + +``` +nvl(expression1, expression2) +``` + +#### Arguments + +- **expression1**: Expression to return if not null. Can be a constant, column, or function, and any combination of operators. +- **expression2**: Expression to return if expr1 is null. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select nvl(null, 'a'); ++---------------------+ +| nvl(NULL,Utf8("a")) | ++---------------------+ +| a | ++---------------------+\ +> select nvl('b', 'a'); ++--------------------------+ +| nvl(Utf8("b"),Utf8("a")) | ++--------------------------+ +| b | ++--------------------------+ +``` + +#### Aliases + +- ifnull + +### `nvl2` + +Returns _expression2_ if _expression1_ is not NULL; otherwise it returns _expression3_. + +``` +nvl2(expression1, expression2, expression3) +``` + +#### Arguments + +- **expression1**: Expression to test for null. Can be a constant, column, or function, and any combination of operators. +- **expression2**: Expression to return if expr1 is not null. Can be a constant, column, or function, and any combination of operators. +- **expression3**: Expression to return if expr1 is null. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select nvl2(null, 'a', 'b'); ++--------------------------------+ +| nvl2(NULL,Utf8("a"),Utf8("b")) | ++--------------------------------+ +| b | ++--------------------------------+ +> select nvl2('data', 'a', 'b'); ++----------------------------------------+ +| nvl2(Utf8("data"),Utf8("a"),Utf8("b")) | ++----------------------------------------+ +| a | ++----------------------------------------+ +``` + +## String Functions + +- [ascii](#ascii) +- [bit_length](#bit_length) +- [btrim](#btrim) +- [char_length](#char_length) +- [character_length](#character_length) +- [chr](#chr) +- [concat](#concat) +- [concat_ws](#concat_ws) +- [contains](#contains) +- [ends_with](#ends_with) +- [find_in_set](#find_in_set) +- [initcap](#initcap) +- [instr](#instr) +- [left](#left) +- [length](#length) +- [levenshtein](#levenshtein) +- [lower](#lower) +- [lpad](#lpad) +- [ltrim](#ltrim) +- [octet_length](#octet_length) +- [position](#position) +- [repeat](#repeat) +- [replace](#replace) +- [reverse](#reverse) +- [right](#right) +- [rpad](#rpad) +- [rtrim](#rtrim) +- [split_part](#split_part) +- [starts_with](#starts_with) +- [strpos](#strpos) +- [substr](#substr) +- [substr_index](#substr_index) +- [substring](#substring) +- [substring_index](#substring_index) +- [to_hex](#to_hex) +- [translate](#translate) +- [trim](#trim) +- [upper](#upper) +- [uuid](#uuid) + +### `ascii` + +Returns the Unicode character code of the first character in a string. + +``` +ascii(str) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select ascii('abc'); ++--------------------+ +| ascii(Utf8("abc")) | ++--------------------+ +| 97 | ++--------------------+ +> select ascii('🚀'); ++-------------------+ +| ascii(Utf8("🚀")) | ++-------------------+ +| 128640 | ++-------------------+ +``` + +**Related functions**: + +- [chr](#chr) + +### `bit_length` + +Returns the bit length of a string. + +``` +bit_length(str) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select bit_length('datafusion'); ++--------------------------------+ +| bit_length(Utf8("datafusion")) | ++--------------------------------+ +| 80 | ++--------------------------------+ +``` + +**Related functions**: + +- [length](#length) +- [octet_length](#octet_length) + +### `btrim` + +Trims the specified trim string from the start and end of a string. If no trim string is provided, all whitespace is removed from the start and end of the input string. + +``` +btrim(str[, trim_str]) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **trim_str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. _Default is whitespace characters._ + +#### Example + +```sql +> select btrim('__datafusion____', '_'); ++-------------------------------------------+ +| btrim(Utf8("__datafusion____"),Utf8("_")) | ++-------------------------------------------+ +| datafusion | ++-------------------------------------------+ +``` + +#### Alternative Syntax + +```sql +trim(BOTH trim_str FROM str) +``` + +```sql +trim(trim_str FROM str) +``` + +#### Aliases + +- trim + +**Related functions**: + +- [ltrim](#ltrim) +- [rtrim](#rtrim) + +### `char_length` + +_Alias of [character_length](#character_length)._ + +### `character_length` + +Returns the number of characters in a string. + +``` +character_length(str) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select character_length('Ångström'); ++------------------------------------+ +| character_length(Utf8("Ångström")) | ++------------------------------------+ +| 8 | ++------------------------------------+ +``` + +#### Aliases + +- length +- char_length + +**Related functions**: + +- [bit_length](#bit_length) +- [octet_length](#octet_length) + +### `chr` + +Returns the character with the specified ASCII or Unicode code value. + +``` +chr(expression) +``` + +#### Arguments + +- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select chr(128640); ++--------------------+ +| chr(Int64(128640)) | ++--------------------+ +| 🚀 | ++--------------------+ +``` + +**Related functions**: + +- [ascii](#ascii) + +### `concat` + +Concatenates multiple strings together. + +``` +concat(str[, ..., str_n]) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **str_n**: Subsequent string expressions to concatenate. + +#### Example + +```sql +> select concat('data', 'f', 'us', 'ion'); ++-------------------------------------------------------+ +| concat(Utf8("data"),Utf8("f"),Utf8("us"),Utf8("ion")) | ++-------------------------------------------------------+ +| datafusion | ++-------------------------------------------------------+ +``` + +**Related functions**: + +- [concat_ws](#concat_ws) + +### `concat_ws` + +Concatenates multiple strings together with a specified separator. + +``` +concat_ws(separator, str[, ..., str_n]) +``` + +#### Arguments + +- **separator**: Separator to insert between concatenated strings. +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **str_n**: Subsequent string expressions to concatenate. + +#### Example + +```sql +> select concat_ws('_', 'data', 'fusion'); ++--------------------------------------------------+ +| concat_ws(Utf8("_"),Utf8("data"),Utf8("fusion")) | ++--------------------------------------------------+ +| data_fusion | ++--------------------------------------------------+ +``` + +**Related functions**: + +- [concat](#concat) + +### `contains` + +Return true if search_str is found within string (case-sensitive). + +``` +contains(str, search_str) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **search_str**: The string to search for in str. + +#### Example + +```sql +> select contains('the quick brown fox', 'row'); ++---------------------------------------------------+ +| contains(Utf8("the quick brown fox"),Utf8("row")) | ++---------------------------------------------------+ +| true | ++---------------------------------------------------+ +``` + +### `ends_with` + +Tests if a string ends with a substring. + +``` +ends_with(str, substr) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **substr**: Substring to test for. + +#### Example + +```sql +> select ends_with('datafusion', 'soin'); ++--------------------------------------------+ +| ends_with(Utf8("datafusion"),Utf8("soin")) | ++--------------------------------------------+ +| false | ++--------------------------------------------+ +> select ends_with('datafusion', 'sion'); ++--------------------------------------------+ +| ends_with(Utf8("datafusion"),Utf8("sion")) | ++--------------------------------------------+ +| true | ++--------------------------------------------+ +``` + +### `find_in_set` + +Returns a value in the range of 1 to N if the string str is in the string list strlist consisting of N substrings. + +``` +find_in_set(str, strlist) +``` + +#### Arguments + +- **str**: String expression to find in strlist. +- **strlist**: A string list is a string composed of substrings separated by , characters. + +#### Example + +```sql +> select find_in_set('b', 'a,b,c,d'); ++----------------------------------------+ +| find_in_set(Utf8("b"),Utf8("a,b,c,d")) | ++----------------------------------------+ +| 2 | ++----------------------------------------+ +``` + +### `initcap` + +Capitalizes the first character in each word in the input string. Words are delimited by non-alphanumeric characters. + +``` +initcap(str) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select initcap('apache datafusion'); ++------------------------------------+ +| initcap(Utf8("apache datafusion")) | ++------------------------------------+ +| Apache Datafusion | ++------------------------------------+ +``` + +**Related functions**: + +- [lower](#lower) +- [upper](#upper) + +### `instr` + +_Alias of [strpos](#strpos)._ + +### `left` + +Returns a specified number of characters from the left side of a string. + +``` +left(str, n) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **n**: Number of characters to return. + +#### Example + +```sql +> select left('datafusion', 4); ++-----------------------------------+ +| left(Utf8("datafusion"),Int64(4)) | ++-----------------------------------+ +| data | ++-----------------------------------+ +``` + +**Related functions**: + +- [right](#right) + +### `length` + +_Alias of [character_length](#character_length)._ + +### `levenshtein` + +Returns the [`Levenshtein distance`](https://en.wikipedia.org/wiki/Levenshtein_distance) between the two given strings. + +``` +levenshtein(str1, str2) +``` + +#### Arguments + +- **str1**: String expression to compute Levenshtein distance with str2. +- **str2**: String expression to compute Levenshtein distance with str1. + +#### Example + +```sql +> select levenshtein('kitten', 'sitting'); ++---------------------------------------------+ +| levenshtein(Utf8("kitten"),Utf8("sitting")) | ++---------------------------------------------+ +| 3 | ++---------------------------------------------+ +``` + +### `lower` + +Converts a string to lower-case. + +``` +lower(str) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select lower('Ångström'); ++-------------------------+ +| lower(Utf8("Ångström")) | ++-------------------------+ +| ångström | ++-------------------------+ +``` + +**Related functions**: + +- [initcap](#initcap) +- [upper](#upper) + +### `lpad` + +Pads the left side of a string with another string to a specified string length. + +``` +lpad(str, n[, padding_str]) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **n**: String length to pad to. +- **padding_str**: Optional string expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._ + +#### Example + +```sql +> select lpad('Dolly', 10, 'hello'); ++---------------------------------------------+ +| lpad(Utf8("Dolly"),Int64(10),Utf8("hello")) | ++---------------------------------------------+ +| helloDolly | ++---------------------------------------------+ +``` + +**Related functions**: + +- [rpad](#rpad) + +### `ltrim` + +Trims the specified trim string from the beginning of a string. If no trim string is provided, all whitespace is removed from the start of the input string. + +``` +ltrim(str[, trim_str]) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **trim_str**: String expression to trim from the beginning of the input string. Can be a constant, column, or function, and any combination of arithmetic operators. _Default is whitespace characters._ + +#### Example + +```sql +> select ltrim(' datafusion '); ++-------------------------------+ +| ltrim(Utf8(" datafusion ")) | ++-------------------------------+ +| datafusion | ++-------------------------------+ +> select ltrim('___datafusion___', '_'); ++-------------------------------------------+ +| ltrim(Utf8("___datafusion___"),Utf8("_")) | ++-------------------------------------------+ +| datafusion___ | ++-------------------------------------------+ +``` + +#### Alternative Syntax + +```sql +trim(LEADING trim_str FROM str) +``` + +**Related functions**: + +- [btrim](#btrim) +- [rtrim](#rtrim) + +### `octet_length` + +Returns the length of a string in bytes. + +``` +octet_length(str) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select octet_length('Ångström'); ++--------------------------------+ +| octet_length(Utf8("Ångström")) | ++--------------------------------+ +| 10 | ++--------------------------------+ +``` + +**Related functions**: + +- [bit_length](#bit_length) +- [length](#length) + +### `position` + +_Alias of [strpos](#strpos)._ + +### `repeat` + +Returns a string with an input string repeated a specified number. + +``` +repeat(str, n) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **n**: Number of times to repeat the input string. + +#### Example + +```sql +> select repeat('data', 3); ++-------------------------------+ +| repeat(Utf8("data"),Int64(3)) | ++-------------------------------+ +| datadatadata | ++-------------------------------+ +``` + +### `replace` + +Replaces all occurrences of a specified substring in a string with a new substring. + +``` +replace(str, substr, replacement) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **substr**: Substring expression to replace in the input string. Substring expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **replacement**: Replacement substring expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select replace('ABabbaBA', 'ab', 'cd'); ++-------------------------------------------------+ +| replace(Utf8("ABabbaBA"),Utf8("ab"),Utf8("cd")) | ++-------------------------------------------------+ +| ABcdbaBA | ++-------------------------------------------------+ +``` + +### `reverse` + +Reverses the character order of a string. + +``` +reverse(str) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select reverse('datafusion'); ++-----------------------------+ +| reverse(Utf8("datafusion")) | ++-----------------------------+ +| noisufatad | ++-----------------------------+ +``` + +### `right` + +Returns a specified number of characters from the right side of a string. + +``` +right(str, n) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **n**: Number of characters to return + +#### Example + +```sql +> select right('datafusion', 6); ++------------------------------------+ +| right(Utf8("datafusion"),Int64(6)) | ++------------------------------------+ +| fusion | ++------------------------------------+ +``` + +**Related functions**: + +- [left](#left) + +### `rpad` + +Pads the right side of a string with another string to a specified string length. + +``` +rpad(str, n[, padding_str]) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **n**: String length to pad to. +- **padding_str**: String expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._ + +#### Example + +```sql +> select rpad('datafusion', 20, '_-'); ++-----------------------------------------------+ +| rpad(Utf8("datafusion"),Int64(20),Utf8("_-")) | ++-----------------------------------------------+ +| datafusion_-_-_-_-_- | ++-----------------------------------------------+ +``` + +**Related functions**: + +- [lpad](#lpad) + +### `rtrim` + +Trims the specified trim string from the end of a string. If no trim string is provided, all whitespace is removed from the end of the input string. + +``` +rtrim(str[, trim_str]) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **trim_str**: String expression to trim from the end of the input string. Can be a constant, column, or function, and any combination of arithmetic operators. _Default is whitespace characters._ + +#### Example + +```sql +> select rtrim(' datafusion '); ++-------------------------------+ +| rtrim(Utf8(" datafusion ")) | ++-------------------------------+ +| datafusion | ++-------------------------------+ +> select rtrim('___datafusion___', '_'); ++-------------------------------------------+ +| rtrim(Utf8("___datafusion___"),Utf8("_")) | ++-------------------------------------------+ +| ___datafusion | ++-------------------------------------------+ +``` + +#### Alternative Syntax + +```sql +trim(TRAILING trim_str FROM str) +``` + +**Related functions**: + +- [btrim](#btrim) +- [ltrim](#ltrim) + +### `split_part` + +Splits a string based on a specified delimiter and returns the substring in the specified position. + +``` +split_part(str, delimiter, pos) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **delimiter**: String or character to split on. +- **pos**: Position of the part to return. + +#### Example + +```sql +> select split_part('1.2.3.4.5', '.', 3); ++--------------------------------------------------+ +| split_part(Utf8("1.2.3.4.5"),Utf8("."),Int64(3)) | ++--------------------------------------------------+ +| 3 | ++--------------------------------------------------+ +``` + +### `starts_with` + +Tests if a string starts with a substring. + +``` +starts_with(str, substr) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **substr**: Substring to test for. + +#### Example + +```sql +> select starts_with('datafusion','data'); ++----------------------------------------------+ +| starts_with(Utf8("datafusion"),Utf8("data")) | ++----------------------------------------------+ +| true | ++----------------------------------------------+ +``` + +### `strpos` + +Returns the starting position of a specified substring in a string. Positions begin at 1. If the substring does not exist in the string, the function returns 0. + +``` +strpos(str, substr) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **substr**: Substring expression to search for. + +#### Example + +```sql +> select strpos('datafusion', 'fus'); ++----------------------------------------+ +| strpos(Utf8("datafusion"),Utf8("fus")) | ++----------------------------------------+ +| 5 | ++----------------------------------------+ +``` + +#### Alternative Syntax + +```sql +position(substr in origstr) +``` + +#### Aliases + +- instr +- position + +### `substr` + +Extracts a substring of a specified number of characters from a specific starting position in a string. + +``` +substr(str, start_pos[, length]) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **start_pos**: Character position to start the substring at. The first character in the string has a position of 1. +- **length**: Number of characters to extract. If not specified, returns the rest of the string after the start position. + +#### Example + +```sql +> select substr('datafusion', 5, 3); ++----------------------------------------------+ +| substr(Utf8("datafusion"),Int64(5),Int64(3)) | ++----------------------------------------------+ +| fus | ++----------------------------------------------+ +``` + +#### Alternative Syntax + +```sql +substring(str from start_pos for length) +``` + +#### Aliases + +- substring + +### `substr_index` + +Returns the substring from str before count occurrences of the delimiter delim. +If count is positive, everything to the left of the final delimiter (counting from the left) is returned. +If count is negative, everything to the right of the final delimiter (counting from the right) is returned. + +``` +substr_index(str, delim, count) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **delim**: The string to find in str to split str. +- **count**: The number of times to search for the delimiter. Can be either a positive or negative number. + +#### Example + +```sql +> select substr_index('www.apache.org', '.', 1); ++---------------------------------------------------------+ +| substr_index(Utf8("www.apache.org"),Utf8("."),Int64(1)) | ++---------------------------------------------------------+ +| www | ++---------------------------------------------------------+ +> select substr_index('www.apache.org', '.', -1); ++----------------------------------------------------------+ +| substr_index(Utf8("www.apache.org"),Utf8("."),Int64(-1)) | ++----------------------------------------------------------+ +| org | ++----------------------------------------------------------+ +``` + +#### Aliases + +- substring_index + +### `substring` + +_Alias of [substr](#substr)._ + +### `substring_index` + +_Alias of [substr_index](#substr_index)._ + +### `to_hex` + +Converts an integer to a hexadecimal string. + +``` +to_hex(int) +``` + +#### Arguments + +- **int**: Integer expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select to_hex(12345689); ++-------------------------+ +| to_hex(Int64(12345689)) | ++-------------------------+ +| bc6159 | ++-------------------------+ +``` + +### `translate` + +Translates characters in a string to specified translation characters. + +``` +translate(str, chars, translation) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **chars**: Characters to translate. +- **translation**: Translation characters. Translation characters replace only characters at the same position in the **chars** string. + +#### Example + +```sql +> select translate('twice', 'wic', 'her'); ++--------------------------------------------------+ +| translate(Utf8("twice"),Utf8("wic"),Utf8("her")) | ++--------------------------------------------------+ +| there | ++--------------------------------------------------+ +``` + +### `trim` + +_Alias of [btrim](#btrim)._ + +### `upper` + +Converts a string to upper-case. + +``` +upper(str) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select upper('dataFusion'); ++---------------------------+ +| upper(Utf8("dataFusion")) | ++---------------------------+ +| DATAFUSION | ++---------------------------+ +``` + +**Related functions**: + +- [initcap](#initcap) +- [lower](#lower) + +### `uuid` + +Returns [`UUID v4`]() string value which is unique per row. + +``` +uuid() +``` + +#### Example + +```sql +> select uuid(); ++--------------------------------------+ +| uuid() | ++--------------------------------------+ +| 6ec17ef8-1934-41cc-8d59-d0c8f9eea1f0 | ++--------------------------------------+ +``` + +## Binary String Functions + +- [decode](#decode) +- [encode](#encode) + +### `decode` + +Decode binary data from textual representation in string. + +``` +decode(expression, format) +``` + +#### Arguments + +- **expression**: Expression containing encoded string data +- **format**: Same arguments as [encode](#encode) + +**Related functions**: + +- [encode](#encode) + +### `encode` + +Encode binary data into a textual representation. + +``` +encode(expression, format) +``` + +#### Arguments + +- **expression**: Expression containing string or binary data +- **format**: Supported formats are: `base64`, `hex` + +**Related functions**: + +- [decode](#decode) + +## Regular Expression Functions + +Apache DataFusion uses a [PCRE-like](https://en.wikibooks.org/wiki/Regular_Expressions/Perl-Compatible_Regular_Expressions) +regular expression [syntax](https://docs.rs/regex/latest/regex/#syntax) +(minus support for several features including look-around and backreferences). +The following regular expression functions are supported: + +- [regexp_count](#regexp_count) +- [regexp_like](#regexp_like) +- [regexp_match](#regexp_match) +- [regexp_replace](#regexp_replace) + +### `regexp_count` + +Returns the number of matches that a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has in a string. + +``` +regexp_count(str, regexp[, start, flags]) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **regexp**: Regular expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **start**: - **start**: Optional start position (the first position is 1) to search for the regular expression. Can be a constant, column, or function. +- **flags**: Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: + - **i**: case-insensitive: letters match both upper and lower case + - **m**: multi-line mode: ^ and $ match begin/end of line + - **s**: allow . to match \n + - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used + - **U**: swap the meaning of x* and x*? + +#### Example + +```sql +> select regexp_count('abcAbAbc', 'abc', 2, 'i'); ++---------------------------------------------------------------+ +| regexp_count(Utf8("abcAbAbc"),Utf8("abc"),Int64(2),Utf8("i")) | ++---------------------------------------------------------------+ +| 1 | ++---------------------------------------------------------------+ +``` + +### `regexp_like` + +Returns true if a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has at least one match in a string, false otherwise. + +``` +regexp_like(str, regexp[, flags]) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **regexp**: Regular expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **flags**: Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: + - **i**: case-insensitive: letters match both upper and lower case + - **m**: multi-line mode: ^ and $ match begin/end of line + - **s**: allow . to match \n + - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used + - **U**: swap the meaning of x* and x*? + +#### Example + +```sql +select regexp_like('Köln', '[a-zA-Z]ö[a-zA-Z]{2}'); ++--------------------------------------------------------+ +| regexp_like(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) | ++--------------------------------------------------------+ +| true | ++--------------------------------------------------------+ +SELECT regexp_like('aBc', '(b|d)', 'i'); ++--------------------------------------------------+ +| regexp_like(Utf8("aBc"),Utf8("(b|d)"),Utf8("i")) | ++--------------------------------------------------+ +| true | ++--------------------------------------------------+ +``` + +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) + +### `regexp_match` + +Returns the first [regular expression](https://docs.rs/regex/latest/regex/#syntax) matches in a string. + +``` +regexp_match(str, regexp[, flags]) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **regexp**: Regular expression to match against. + Can be a constant, column, or function. +- **flags**: Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: + - **i**: case-insensitive: letters match both upper and lower case + - **m**: multi-line mode: ^ and $ match begin/end of line + - **s**: allow . to match \n + - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used + - **U**: swap the meaning of x* and x*? + +#### Example + +```sql + > select regexp_match('Köln', '[a-zA-Z]ö[a-zA-Z]{2}'); + +---------------------------------------------------------+ + | regexp_match(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) | + +---------------------------------------------------------+ + | [Köln] | + +---------------------------------------------------------+ + SELECT regexp_match('aBc', '(b|d)', 'i'); + +---------------------------------------------------+ + | regexp_match(Utf8("aBc"),Utf8("(b|d)"),Utf8("i")) | + +---------------------------------------------------+ + | [B] | + +---------------------------------------------------+ +``` + +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) + +### `regexp_replace` + +Replaces substrings in a string that match a [regular expression](https://docs.rs/regex/latest/regex/#syntax). + +``` +regexp_replace(str, regexp, replacement[, flags]) +``` + +#### Arguments + +- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **regexp**: Regular expression to match against. + Can be a constant, column, or function. +- **replacement**: Replacement string expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **flags**: Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: +- **g**: (global) Search globally and don't return after the first match +- **i**: case-insensitive: letters match both upper and lower case +- **m**: multi-line mode: ^ and $ match begin/end of line +- **s**: allow . to match \n +- **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used +- **U**: swap the meaning of x* and x*? + +#### Example + +```sql +> select regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g'); ++------------------------------------------------------------------------+ +| regexp_replace(Utf8("foobarbaz"),Utf8("b(..)"),Utf8("X\1Y"),Utf8("g")) | ++------------------------------------------------------------------------+ +| fooXarYXazY | ++------------------------------------------------------------------------+ +SELECT regexp_replace('aBc', '(b|d)', 'Ab\\1a', 'i'); ++-------------------------------------------------------------------+ +| regexp_replace(Utf8("aBc"),Utf8("(b|d)"),Utf8("Ab\1a"),Utf8("i")) | ++-------------------------------------------------------------------+ +| aAbBac | ++-------------------------------------------------------------------+ +``` + +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) + +## Time and Date Functions + +- [current_date](#current_date) +- [current_time](#current_time) +- [current_timestamp](#current_timestamp) +- [date_bin](#date_bin) +- [date_format](#date_format) +- [date_part](#date_part) +- [date_trunc](#date_trunc) +- [datepart](#datepart) +- [datetrunc](#datetrunc) +- [from_unixtime](#from_unixtime) +- [make_date](#make_date) +- [now](#now) +- [to_char](#to_char) +- [to_date](#to_date) +- [to_local_time](#to_local_time) +- [to_timestamp](#to_timestamp) +- [to_timestamp_micros](#to_timestamp_micros) +- [to_timestamp_millis](#to_timestamp_millis) +- [to_timestamp_nanos](#to_timestamp_nanos) +- [to_timestamp_seconds](#to_timestamp_seconds) +- [to_unixtime](#to_unixtime) +- [today](#today) + +### `current_date` + +Returns the current UTC date. + +The `current_date()` return value is determined at query time and will return the same date, no matter when in the query plan the function executes. + +``` +current_date() +``` + +#### Aliases + +- today + +### `current_time` + +Returns the current UTC time. + +The `current_time()` return value is determined at query time and will return the same time, no matter when in the query plan the function executes. + +``` +current_time() +``` + +### `current_timestamp` + +_Alias of [now](#now)._ + +### `date_bin` + +Calculates time intervals and returns the start of the interval nearest to the specified timestamp. Use `date_bin` to downsample time series data by grouping rows into time-based "bins" or "windows" and applying an aggregate or selector function to each window. + +For example, if you "bin" or "window" data into 15 minute intervals, an input timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15 minute bin it is in: `2023-01-01T18:15:00Z`. + +``` +date_bin(interval, expression, origin-timestamp) +``` + +#### Arguments + +- **interval**: Bin interval. +- **expression**: Time expression to operate on. Can be a constant, column, or function. +- **origin-timestamp**: Optional. Starting point used to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in UTC). + +The following intervals are supported: + +- nanoseconds +- microseconds +- milliseconds +- seconds +- minutes +- hours +- days +- weeks +- months +- years +- century + +### `date_format` + +_Alias of [to_char](#to_char)._ + +### `date_part` + +Returns the specified part of the date as an integer. + +``` +date_part(part, expression) +``` + +#### Arguments + +- **part**: Part of the date to return. The following date parts are supported: + + - year + - quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in) + - month + - week (week of the year) + - day (day of the month) + - hour + - minute + - second + - millisecond + - microsecond + - nanosecond + - dow (day of the week) + - doy (day of the year) + - epoch (seconds since Unix epoch) + +- **expression**: Time expression to operate on. Can be a constant, column, or function. + +#### Alternative Syntax + +```sql +extract(field FROM source) +``` + +#### Aliases + +- datepart + +### `date_trunc` + +Truncates a timestamp value to a specified precision. + +``` +date_trunc(precision, expression) +``` + +#### Arguments + +- **precision**: Time precision to truncate to. The following precisions are supported: + + - year / YEAR + - quarter / QUARTER + - month / MONTH + - week / WEEK + - day / DAY + - hour / HOUR + - minute / MINUTE + - second / SECOND + +- **expression**: Time expression to operate on. Can be a constant, column, or function. + +#### Aliases + +- datetrunc + +### `datepart` + +_Alias of [date_part](#date_part)._ + +### `datetrunc` + +_Alias of [date_trunc](#date_trunc)._ + +### `from_unixtime` + +Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return the corresponding timestamp. + +``` +from_unixtime(expression) +``` + +#### Arguments + +- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. + +### `make_date` + +Make a date from year/month/day component parts. + +``` +make_date(year, month, day) +``` + +#### Arguments + +- **year**: Year to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators. +- **month**: Month to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators. +- **day**: Day to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators. + +#### Example + +```sql +> select make_date(2023, 1, 31); ++-------------------------------------------+ +| make_date(Int64(2023),Int64(1),Int64(31)) | ++-------------------------------------------+ +| 2023-01-31 | ++-------------------------------------------+ +> select make_date('2023', '01', '31'); ++-----------------------------------------------+ +| make_date(Utf8("2023"),Utf8("01"),Utf8("31")) | ++-----------------------------------------------+ +| 2023-01-31 | ++-----------------------------------------------+ +``` + +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/make_date.rs) + +### `now` + +Returns the current UTC timestamp. + +The `now()` return value is determined at query time and will return the same timestamp, no matter when in the query plan the function executes. + +``` +now() +``` + +#### Aliases + +- current_timestamp + +### `to_char` + +Returns a string representation of a date, time, timestamp or duration based on a [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html). Unlike the PostgreSQL equivalent of this function numerical formatting is not supported. + +``` +to_char(expression, format) +``` + +#### Arguments + +- **expression**: Expression to operate on. Can be a constant, column, or function that results in a date, time, timestamp or duration. +- **format**: A [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) string to use to convert the expression. +- **day**: Day to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators. + +#### Example + +```sql +> select to_char('2023-03-01'::date, '%d-%m-%Y'); ++----------------------------------------------+ +| to_char(Utf8("2023-03-01"),Utf8("%d-%m-%Y")) | ++----------------------------------------------+ +| 01-03-2023 | ++----------------------------------------------+ +``` + +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_char.rs) + +#### Aliases + +- date_format + +### `to_date` + +Converts a value to a date (`YYYY-MM-DD`). +Supports strings, integer and double types as input. +Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. +Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`). +Returns the corresponding date. + +Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`. + +``` +to_date('2017-05-31', '%Y-%m-%d') +``` + +#### Arguments + +- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order + they appear with the first successful one being returned. If none of the formats successfully parse the expression + an error will be returned. + +#### Example + +```sql +> select to_date('2023-01-31'); ++-----------------------------+ +| to_date(Utf8("2023-01-31")) | ++-----------------------------+ +| 2023-01-31 | ++-----------------------------+ +> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d'); ++---------------------------------------------------------------+ +| to_date(Utf8("2023/01/31"),Utf8("%Y-%m-%d"),Utf8("%Y/%m/%d")) | ++---------------------------------------------------------------+ +| 2023-01-31 | ++---------------------------------------------------------------+ +``` + +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_date.rs) + +### `to_local_time` + +Converts a timestamp with a timezone to a timestamp without a timezone (with no offset or timezone information). This function handles daylight saving time changes. + +``` +to_local_time(expression) +``` + +#### Arguments + +- **expression**: Time expression to operate on. Can be a constant, column, or function. + +#### Example + +```sql +> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp); ++---------------------------------------------+ +| to_local_time(Utf8("2024-04-01T00:00:20Z")) | ++---------------------------------------------+ +| 2024-04-01T00:00:20 | ++---------------------------------------------+ + +> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels'); ++---------------------------------------------+ +| to_local_time(Utf8("2024-04-01T00:00:20Z")) | ++---------------------------------------------+ +| 2024-04-01T00:00:20 | ++---------------------------------------------+ + +> SELECT + time, + arrow_typeof(time) as type, + to_local_time(time) as to_local_time, + arrow_typeof(to_local_time(time)) as to_local_time_type +FROM ( + SELECT '2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels' AS time +); ++---------------------------+------------------------------------------------+---------------------+-----------------------------+ +| time | type | to_local_time | to_local_time_type | ++---------------------------+------------------------------------------------+---------------------+-----------------------------+ +| 2024-04-01T00:00:20+02:00 | Timestamp(Nanosecond, Some("Europe/Brussels")) | 2024-04-01T00:00:20 | Timestamp(Nanosecond, None) | ++---------------------------+------------------------------------------------+---------------------+-----------------------------+ + +# combine `to_local_time()` with `date_bin()` to bin on boundaries in the timezone rather +# than UTC boundaries + +> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AS date_bin; ++---------------------+ +| date_bin | ++---------------------+ +| 2024-04-01T00:00:00 | ++---------------------+ + +> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AT TIME ZONE 'Europe/Brussels' AS date_bin_with_timezone; ++---------------------------+ +| date_bin_with_timezone | ++---------------------------+ +| 2024-04-01T00:00:00+02:00 | ++---------------------------+ +``` + +### `to_timestamp` + +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. + +Note: `to_timestamp` returns `Timestamp(Nanosecond)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds. + +``` +to_timestamp(expression[, ..., format_n]) +``` + +#### Arguments + +- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. +- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. + +#### Example + +```sql +> select to_timestamp('2023-01-31T09:26:56.123456789-05:00'); ++-----------------------------------------------------------+ +| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++-----------------------------------------------------------+ +| 2023-01-31T14:26:56.123456789 | ++-----------------------------------------------------------+ +> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++--------------------------------------------------------------------------------------------------------+ +| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++--------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00.123456789 | ++--------------------------------------------------------------------------------------------------------+ +``` + +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) + +### `to_timestamp_micros` + +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp. + +``` +to_timestamp_micros(expression[, ..., format_n]) +``` + +#### Arguments + +- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. +- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. + +#### Example + +```sql +> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00'); ++------------------------------------------------------------------+ +| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++------------------------------------------------------------------+ +| 2023-01-31T14:26:56.123456 | ++------------------------------------------------------------------+ +> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++---------------------------------------------------------------------------------------------------------------+ +| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++---------------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00.123456 | ++---------------------------------------------------------------------------------------------------------------+ +``` + +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) + +### `to_timestamp_millis` + +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. + +``` +to_timestamp_millis(expression[, ..., format_n]) +``` + +#### Arguments + +- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. +- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. + +#### Example + +```sql +> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00'); ++------------------------------------------------------------------+ +| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++------------------------------------------------------------------+ +| 2023-01-31T14:26:56.123 | ++------------------------------------------------------------------+ +> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++---------------------------------------------------------------------------------------------------------------+ +| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++---------------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00.123 | ++---------------------------------------------------------------------------------------------------------------+ +``` + +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) + +### `to_timestamp_nanos` + +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. + +``` +to_timestamp_nanos(expression[, ..., format_n]) +``` + +#### Arguments + +- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. +- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. + +#### Example + +```sql +> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00'); ++-----------------------------------------------------------------+ +| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++-----------------------------------------------------------------+ +| 2023-01-31T14:26:56.123456789 | ++-----------------------------------------------------------------+ +> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++--------------------------------------------------------------------------------------------------------------+ +| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++--------------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00.123456789 | ++---------------------------------------------------------------------------------------------------------------+ +``` + +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) + +### `to_timestamp_seconds` + +Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. + +``` +to_timestamp_seconds(expression[, ..., format_n]) +``` + +#### Arguments + +- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. +- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. + +#### Example + +```sql +> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00'); ++-------------------------------------------------------------------+ +| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) | ++-------------------------------------------------------------------+ +| 2023-01-31T14:26:56 | ++-------------------------------------------------------------------+ +> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); ++----------------------------------------------------------------------------------------------------------------+ +| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | ++----------------------------------------------------------------------------------------------------------------+ +| 2023-05-17T03:59:00 | ++----------------------------------------------------------------------------------------------------------------+ +``` + +Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) + +### `to_unixtime` + +Converts a value to seconds since the unix epoch (`1970-01-01T00:00:00Z`). Supports strings, dates, timestamps and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. + +``` +to_unixtime(expression[, ..., format_n]) +``` + +#### Arguments + +- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. +- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. + +#### Example + +```sql +> select to_unixtime('2020-09-08T12:00:00+00:00'); ++------------------------------------------------+ +| to_unixtime(Utf8("2020-09-08T12:00:00+00:00")) | ++------------------------------------------------+ +| 1599566400 | ++------------------------------------------------+ +> select to_unixtime('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', '%m-%d-%Y %H:%M:%S%#z'); ++-----------------------------------------------------------------------------------------------------------------------------+ +| to_unixtime(Utf8("01-14-2023 01:01:30+05:30"),Utf8("%q"),Utf8("%d-%m-%Y %H/%M/%S"),Utf8("%+"),Utf8("%m-%d-%Y %H:%M:%S%#z")) | ++-----------------------------------------------------------------------------------------------------------------------------+ +| 1673638290 | ++-----------------------------------------------------------------------------------------------------------------------------+ +``` + +### `today` + +_Alias of [current_date](#current_date)._ + +## Array Functions + +- [array_any_value](#array_any_value) +- [array_append](#array_append) +- [array_cat](#array_cat) +- [array_concat](#array_concat) +- [array_contains](#array_contains) +- [array_dims](#array_dims) +- [array_distance](#array_distance) +- [array_distinct](#array_distinct) +- [array_element](#array_element) +- [array_empty](#array_empty) +- [array_except](#array_except) +- [array_extract](#array_extract) +- [array_has](#array_has) +- [array_has_all](#array_has_all) +- [array_has_any](#array_has_any) +- [array_indexof](#array_indexof) +- [array_intersect](#array_intersect) +- [array_join](#array_join) +- [array_length](#array_length) +- [array_ndims](#array_ndims) +- [array_pop_back](#array_pop_back) +- [array_pop_front](#array_pop_front) +- [array_position](#array_position) +- [array_positions](#array_positions) +- [array_prepend](#array_prepend) +- [array_push_back](#array_push_back) +- [array_push_front](#array_push_front) +- [array_remove](#array_remove) +- [array_remove_all](#array_remove_all) +- [array_remove_n](#array_remove_n) +- [array_repeat](#array_repeat) +- [array_replace](#array_replace) +- [array_replace_all](#array_replace_all) +- [array_replace_n](#array_replace_n) +- [array_resize](#array_resize) +- [array_reverse](#array_reverse) +- [array_slice](#array_slice) +- [array_sort](#array_sort) +- [array_to_string](#array_to_string) +- [array_union](#array_union) +- [cardinality](#cardinality) +- [empty](#empty) +- [flatten](#flatten) +- [generate_series](#generate_series) +- [list_any_value](#list_any_value) +- [list_append](#list_append) +- [list_cat](#list_cat) +- [list_concat](#list_concat) +- [list_contains](#list_contains) +- [list_dims](#list_dims) +- [list_distance](#list_distance) +- [list_distinct](#list_distinct) +- [list_element](#list_element) +- [list_empty](#list_empty) +- [list_except](#list_except) +- [list_extract](#list_extract) +- [list_has](#list_has) +- [list_has_all](#list_has_all) +- [list_has_any](#list_has_any) +- [list_indexof](#list_indexof) +- [list_intersect](#list_intersect) +- [list_join](#list_join) +- [list_length](#list_length) +- [list_ndims](#list_ndims) +- [list_pop_back](#list_pop_back) +- [list_pop_front](#list_pop_front) +- [list_position](#list_position) +- [list_positions](#list_positions) +- [list_prepend](#list_prepend) +- [list_push_back](#list_push_back) +- [list_push_front](#list_push_front) +- [list_remove](#list_remove) +- [list_remove_all](#list_remove_all) +- [list_remove_n](#list_remove_n) +- [list_repeat](#list_repeat) +- [list_replace](#list_replace) +- [list_replace_all](#list_replace_all) +- [list_replace_n](#list_replace_n) +- [list_resize](#list_resize) +- [list_reverse](#list_reverse) +- [list_slice](#list_slice) +- [list_sort](#list_sort) +- [list_to_string](#list_to_string) +- [list_union](#list_union) +- [make_array](#make_array) +- [make_list](#make_list) +- [range](#range) +- [string_to_array](#string_to_array) +- [string_to_list](#string_to_list) + +### `array_any_value` + +Extracts the element with the index n from the array. + +``` +array_element(array, index) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **index**: Index to extract the element from the array. + +#### Example + +```sql +> select array_element([1, 2, 3, 4], 3); ++-----------------------------------------+ +| array_element(List([1,2,3,4]),Int64(3)) | ++-----------------------------------------+ +| 3 | ++-----------------------------------------+ +``` + +#### Aliases + +- list_any_value + +### `array_append` + +Appends an element to the end of an array. + +``` +array_append(array, element) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **element**: Element to append to the array. + +#### Example + +```sql +> select array_append([1, 2, 3], 4); ++--------------------------------------+ +| array_append(List([1,2,3]),Int64(4)) | ++--------------------------------------+ +| [1, 2, 3, 4] | ++--------------------------------------+ +``` + +#### Aliases + +- list_append +- array_push_back +- list_push_back + +### `array_cat` + +_Alias of [array_concat](#array_concat)._ + +### `array_concat` + +Appends an element to the end of an array. + +``` +array_append(array, element) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **element**: Element to append to the array. + +#### Example + +```sql +> select array_append([1, 2, 3], 4); ++--------------------------------------+ +| array_append(List([1,2,3]),Int64(4)) | ++--------------------------------------+ +| [1, 2, 3, 4] | ++--------------------------------------+ +``` + +#### Aliases + +- array_cat +- list_concat +- list_cat + +### `array_contains` + +_Alias of [array_has](#array_has)._ + +### `array_dims` + +Returns an array of the array's dimensions. + +``` +array_dims(array) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. + +#### Example + +```sql +> select array_dims([[1, 2, 3], [4, 5, 6]]); ++---------------------------------+ +| array_dims(List([1,2,3,4,5,6])) | ++---------------------------------+ +| [2, 3] | ++---------------------------------+ +``` + +#### Aliases + +- list_dims + +### `array_distance` + +Returns the Euclidean distance between two input arrays of equal length. + +``` +array_distance(array1, array2) +``` + +#### Arguments + +- **array1**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **array2**: Array expression. Can be a constant, column, or function, and any combination of array operators. + +#### Example + +```sql +> select array_distance([1, 2], [1, 4]); ++------------------------------------+ +| array_distance(List([1,2], [1,4])) | ++------------------------------------+ +| 2.0 | ++------------------------------------+ +``` + +#### Aliases + +- list_distance + +### `array_distinct` + +Returns distinct values from the array after removing duplicates. + +``` +array_distinct(array) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. + +#### Example + +```sql +> select array_distinct([1, 3, 2, 3, 1, 2, 4]); ++---------------------------------+ +| array_distinct(List([1,2,3,4])) | ++---------------------------------+ +| [1, 2, 3, 4] | ++---------------------------------+ +``` + +#### Aliases + +- list_distinct + +### `array_element` + +Extracts the element with the index n from the array. + +``` +array_element(array, index) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **index**: Index to extract the element from the array. + +#### Example + +```sql +> select array_element([1, 2, 3, 4], 3); ++-----------------------------------------+ +| array_element(List([1,2,3,4]),Int64(3)) | ++-----------------------------------------+ +| 3 | ++-----------------------------------------+ +``` + +#### Aliases + +- array_extract +- list_element +- list_extract + +### `array_empty` + +_Alias of [empty](#empty)._ + +### `array_except` + +Returns an array of the elements that appear in the first array but not in the second. + +``` +array_except(array1, array2) +``` + +#### Arguments + +- **array1**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **array2**: Array expression. Can be a constant, column, or function, and any combination of array operators. + +#### Example + +```sql +> select array_except([1, 2, 3, 4], [5, 6, 3, 4]); ++----------------------------------------------------+ +| array_except([1, 2, 3, 4], [5, 6, 3, 4]); | ++----------------------------------------------------+ +| [1, 2] | ++----------------------------------------------------+ +> select array_except([1, 2, 3, 4], [3, 4, 5, 6]); ++----------------------------------------------------+ +| array_except([1, 2, 3, 4], [3, 4, 5, 6]); | ++----------------------------------------------------+ +| [1, 2] | ++----------------------------------------------------+ +``` + +#### Aliases + +- list_except + +### `array_extract` + +_Alias of [array_element](#array_element)._ + +### `array_has` + +Returns true if the array contains the element. + +``` +array_has(array, element) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **element**: Scalar or Array expression. Can be a constant, column, or function, and any combination of array operators. + +#### Example + +```sql +> select array_has([1, 2, 3], 2); ++-----------------------------+ +| array_has(List([1,2,3]), 2) | ++-----------------------------+ +| true | ++-----------------------------+ +``` + +#### Aliases + +- list_has +- array_contains +- list_contains + +### `array_has_all` + +Returns true if the array contains the element. + +``` +array_has(array, element) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **element**: Scalar or Array expression. Can be a constant, column, or function, and any combination of array operators. + +#### Example + +```sql +> select array_has([1, 2, 3], 2); ++-----------------------------+ +| array_has(List([1,2,3]), 2) | ++-----------------------------+ +| true | ++-----------------------------+ +``` + +#### Aliases + +- list_has_all + +### `array_has_any` + +Returns true if the array contains the element. + +``` +array_has(array, element) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **element**: Scalar or Array expression. Can be a constant, column, or function, and any combination of array operators. + +#### Example + +```sql +> select array_has([1, 2, 3], 2); ++-----------------------------+ +| array_has(List([1,2,3]), 2) | ++-----------------------------+ +| true | ++-----------------------------+ +``` + +#### Aliases + +- list_has_any + +### `array_indexof` + +_Alias of [array_position](#array_position)._ + +### `array_intersect` + +Returns distinct values from the array after removing duplicates. + +``` +array_distinct(array) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. + +#### Example + +```sql +> select array_distinct([1, 3, 2, 3, 1, 2, 4]); ++---------------------------------+ +| array_distinct(List([1,2,3,4])) | ++---------------------------------+ +| [1, 2, 3, 4] | ++---------------------------------+ +``` + +#### Aliases + +- list_intersect + +### `array_join` + +_Alias of [array_to_string](#array_to_string)._ + +### `array_length` + +Returns the length of the array dimension. + +``` +array_length(array, dimension) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **dimension**: Array dimension. + +#### Example + +```sql +> select array_length([1, 2, 3, 4, 5], 1); ++-------------------------------------------+ +| array_length(List([1,2,3,4,5]), 1) | ++-------------------------------------------+ +| 5 | ++-------------------------------------------+ +``` + +#### Aliases + +- list_length + +### `array_ndims` + +Returns an array of the array's dimensions. + +``` +array_dims(array) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. + +#### Example + +```sql +> select array_dims([[1, 2, 3], [4, 5, 6]]); ++---------------------------------+ +| array_dims(List([1,2,3,4,5,6])) | ++---------------------------------+ +| [2, 3] | ++---------------------------------+ +``` + +#### Aliases + +- list_ndims + +### `array_pop_back` + +Extracts the element with the index n from the array. + +``` +array_element(array, index) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **index**: Index to extract the element from the array. + +#### Example + +```sql +> select array_element([1, 2, 3, 4], 3); ++-----------------------------------------+ +| array_element(List([1,2,3,4]),Int64(3)) | ++-----------------------------------------+ +| 3 | ++-----------------------------------------+ +``` + +#### Aliases + +- list_pop_back + +### `array_pop_front` + +Extracts the element with the index n from the array. + +``` +array_element(array, index) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **index**: Index to extract the element from the array. + +#### Example + +```sql +> select array_element([1, 2, 3, 4], 3); ++-----------------------------------------+ +| array_element(List([1,2,3,4]),Int64(3)) | ++-----------------------------------------+ +| 3 | ++-----------------------------------------+ +``` + +#### Aliases + +- list_pop_front + +### `array_position` + +Returns the position of the first occurrence of the specified element in the array. + +``` +array_position(array, element) +array_position(array, element, index) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **element**: Element to search for position in the array. +- **index**: Index at which to start searching. + +#### Example + +```sql +> select array_position([1, 2, 2, 3, 1, 4], 2); ++----------------------------------------------+ +| array_position(List([1,2,2,3,1,4]),Int64(2)) | ++----------------------------------------------+ +| 2 | ++----------------------------------------------+ +> select array_position([1, 2, 2, 3, 1, 4], 2, 3); ++----------------------------------------------------+ +| array_position(List([1,2,2,3,1,4]),Int64(2), Int64(3)) | ++----------------------------------------------------+ +| 3 | ++----------------------------------------------------+ +``` + +#### Aliases + +- list_position +- array_indexof +- list_indexof + +### `array_positions` + +Returns the position of the first occurrence of the specified element in the array. + +``` +array_position(array, element) +array_position(array, element, index) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **element**: Element to search for position in the array. +- **index**: Index at which to start searching. + +#### Example + +```sql +> select array_position([1, 2, 2, 3, 1, 4], 2); ++----------------------------------------------+ +| array_position(List([1,2,2,3,1,4]),Int64(2)) | ++----------------------------------------------+ +| 2 | ++----------------------------------------------+ +> select array_position([1, 2, 2, 3, 1, 4], 2, 3); ++----------------------------------------------------+ +| array_position(List([1,2,2,3,1,4]),Int64(2), Int64(3)) | ++----------------------------------------------------+ +| 3 | ++----------------------------------------------------+ +``` + +#### Aliases + +- list_positions + +### `array_prepend` + +Appends an element to the end of an array. + +``` +array_append(array, element) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **element**: Element to append to the array. + +#### Example + +```sql +> select array_append([1, 2, 3], 4); ++--------------------------------------+ +| array_append(List([1,2,3]),Int64(4)) | ++--------------------------------------+ +| [1, 2, 3, 4] | ++--------------------------------------+ +``` + +#### Aliases + +- list_prepend +- array_push_front +- list_push_front + +### `array_push_back` + +_Alias of [array_append](#array_append)._ + +### `array_push_front` + +_Alias of [array_prepend](#array_prepend)._ + +### `array_remove` + +Removes the first element from the array equal to the given value. + +``` +array_remove(array, element) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **element**: Element to be removed from the array. + +#### Example + +```sql +> select array_remove([1, 2, 2, 3, 2, 1, 4], 2); ++----------------------------------------------+ +| array_remove(List([1,2,2,3,2,1,4]),Int64(2)) | ++----------------------------------------------+ +| [1, 2, 3, 2, 1, 4] | ++----------------------------------------------+ +``` + +#### Aliases + +- list_remove + +### `array_remove_all` + +Removes the first element from the array equal to the given value. + +``` +array_remove(array, element) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **element**: Element to be removed from the array. + +#### Example + +```sql +> select array_remove([1, 2, 2, 3, 2, 1, 4], 2); ++----------------------------------------------+ +| array_remove(List([1,2,2,3,2,1,4]),Int64(2)) | ++----------------------------------------------+ +| [1, 2, 3, 2, 1, 4] | ++----------------------------------------------+ +``` + +#### Aliases + +- list_remove_all + +### `array_remove_n` + +Removes the first element from the array equal to the given value. + +``` +array_remove(array, element) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **element**: Element to be removed from the array. + +#### Example + +```sql +> select array_remove([1, 2, 2, 3, 2, 1, 4], 2); ++----------------------------------------------+ +| array_remove(List([1,2,2,3,2,1,4]),Int64(2)) | ++----------------------------------------------+ +| [1, 2, 3, 2, 1, 4] | ++----------------------------------------------+ +``` + +#### Aliases + +- list_remove_n + +### `array_repeat` + +Returns an array containing element `count` times. + +``` +array_repeat(element, count) +``` + +#### Arguments + +- **element**: Element expression. Can be a constant, column, or function, and any combination of array operators. +- **count**: Value of how many times to repeat the element. + +#### Example + +```sql +> select array_repeat(1, 3); ++---------------------------------+ +| array_repeat(Int64(1),Int64(3)) | ++---------------------------------+ +| [1, 1, 1] | ++---------------------------------+ +> select array_repeat([1, 2], 2); ++------------------------------------+ +| array_repeat(List([1,2]),Int64(2)) | ++------------------------------------+ +| [[1, 2], [1, 2]] | ++------------------------------------+ +``` + +#### Aliases + +- list_repeat + +### `array_replace` + +Replaces the first `max` occurrences of the specified element with another specified element. + +``` +array_replace_n(array, from, to, max) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **from**: Initial element. +- **to**: Final element. +- **max**: Number of first occurrences to replace. + +#### Example + +```sql +> select array_replace_n([1, 2, 2, 3, 2, 1, 4], 2, 5, 2); ++-------------------------------------------------------------------+ +| array_replace_n(List([1,2,2,3,2,1,4]),Int64(2),Int64(5),Int64(2)) | ++-------------------------------------------------------------------+ +| [1, 5, 5, 3, 2, 1, 4] | ++-------------------------------------------------------------------+ +``` + +#### Aliases + +- list_replace + +### `array_replace_all` + +Replaces the first `max` occurrences of the specified element with another specified element. + +``` +array_replace_n(array, from, to, max) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **from**: Initial element. +- **to**: Final element. +- **max**: Number of first occurrences to replace. + +#### Example + +```sql +> select array_replace_n([1, 2, 2, 3, 2, 1, 4], 2, 5, 2); ++-------------------------------------------------------------------+ +| array_replace_n(List([1,2,2,3,2,1,4]),Int64(2),Int64(5),Int64(2)) | ++-------------------------------------------------------------------+ +| [1, 5, 5, 3, 2, 1, 4] | ++-------------------------------------------------------------------+ +``` + +#### Aliases + +- list_replace_all + +### `array_replace_n` + +Replaces the first `max` occurrences of the specified element with another specified element. + +``` +array_replace_n(array, from, to, max) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **from**: Initial element. +- **to**: Final element. +- **max**: Number of first occurrences to replace. + +#### Example + +```sql +> select array_replace_n([1, 2, 2, 3, 2, 1, 4], 2, 5, 2); ++-------------------------------------------------------------------+ +| array_replace_n(List([1,2,2,3,2,1,4]),Int64(2),Int64(5),Int64(2)) | ++-------------------------------------------------------------------+ +| [1, 5, 5, 3, 2, 1, 4] | ++-------------------------------------------------------------------+ +``` + +#### Aliases + +- list_replace_n + +### `array_resize` + +Resizes the list to contain size elements. Initializes new elements with value or empty if value is not set. + +``` +array_resize(array, size, value) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **size**: New size of given array. +- **value**: Defines new elements' value or empty if value is not set. + +#### Example + +```sql +> select array_resize([1, 2, 3], 5, 0); ++-------------------------------------+ +| array_resize(List([1,2,3],5,0)) | ++-------------------------------------+ +| [1, 2, 3, 0, 0] | ++-------------------------------------+ +``` + +#### Aliases + +- list_resize + +### `array_reverse` + +Returns the array with the order of the elements reversed. + +``` +array_reverse(array) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. + +#### Example + +```sql +> select array_reverse([1, 2, 3, 4]); ++------------------------------------------------------------+ +| array_reverse(List([1, 2, 3, 4])) | ++------------------------------------------------------------+ +| [4, 3, 2, 1] | ++------------------------------------------------------------+ +``` + +#### Aliases + +- list_reverse + +### `array_slice` + +Extracts the element with the index n from the array. + +``` +array_element(array, index) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **index**: Index to extract the element from the array. + +#### Example + +```sql +> select array_element([1, 2, 3, 4], 3); ++-----------------------------------------+ +| array_element(List([1,2,3,4]),Int64(3)) | ++-----------------------------------------+ +| 3 | ++-----------------------------------------+ +``` + +#### Aliases + +- list_slice + +### `array_sort` + +Sort array. + +``` +array_sort(array, desc, nulls_first) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **desc**: Whether to sort in descending order(`ASC` or `DESC`). +- **nulls_first**: Whether to sort nulls first(`NULLS FIRST` or `NULLS LAST`). + +#### Example + +```sql +> select array_sort([3, 1, 2]); ++-----------------------------+ +| array_sort(List([3,1,2])) | ++-----------------------------+ +| [1, 2, 3] | ++-----------------------------+ +``` + +#### Aliases + +- list_sort + +### `array_to_string` + +Converts each element to its text representation. + +``` +array_to_string(array, delimiter) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **delimiter**: Array element separator. + +#### Example + +```sql +> select array_to_string([[1, 2, 3, 4], [5, 6, 7, 8]], ','); ++----------------------------------------------------+ +| array_to_string(List([1,2,3,4,5,6,7,8]),Utf8(",")) | ++----------------------------------------------------+ +| 1,2,3,4,5,6,7,8 | ++----------------------------------------------------+ +``` + +#### Aliases + +- list_to_string +- array_join +- list_join + +### `array_union` + +Returns distinct values from the array after removing duplicates. + +``` +array_distinct(array) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. + +#### Example + +```sql +> select array_distinct([1, 3, 2, 3, 1, 2, 4]); ++---------------------------------+ +| array_distinct(List([1,2,3,4])) | ++---------------------------------+ +| [1, 2, 3, 4] | ++---------------------------------+ +``` + +#### Aliases + +- list_union + +### `cardinality` + +Returns the total number of elements in the array. + +``` +cardinality(array) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. + +#### Example + +```sql +> select cardinality([[1, 2, 3, 4], [5, 6, 7, 8]]); ++--------------------------------------+ +| cardinality(List([1,2,3,4,5,6,7,8])) | ++--------------------------------------+ +| 8 | ++--------------------------------------+ +``` + +### `empty` + +Returns 1 for an empty array or 0 for a non-empty array. + +``` +empty(array) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. + +#### Example + +```sql +> select empty([1]); ++------------------+ +| empty(List([1])) | ++------------------+ +| 0 | ++------------------+ +``` + +#### Aliases + +- array_empty +- list_empty + +### `flatten` + +Converts an array of arrays to a flat array. + +- Applies to any depth of nested arrays +- Does not change arrays that are already flat + +The flattened array contains all the elements from all source arrays. + +``` +flatten(array) +``` + +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. + +#### Example + +```sql +> select flatten([[1, 2], [3, 4]]); ++------------------------------+ +| flatten(List([1,2], [3,4])) | ++------------------------------+ +| [1, 2, 3, 4] | ++------------------------------+ +``` + +### `generate_series` + +Similar to the range function, but it includes the upper bound. + +``` +generate_series(start, stop, step) +``` + +#### Arguments + +- **start**: start of the series. Ints, timestamps, dates or string types that can be coerced to Date32 are supported. +- **end**: end of the series (included). Type must be the same as start. +- **step**: increase by step (can not be 0). Steps less than a day are supported only for timestamp ranges. + +#### Example + +```sql +> select generate_series(1,3); ++------------------------------------+ +| generate_series(Int64(1),Int64(3)) | ++------------------------------------+ +| [1, 2, 3] | ++------------------------------------+ +``` + +### `list_any_value` + +_Alias of [array_any_value](#array_any_value)._ + +### `list_append` + +_Alias of [array_append](#array_append)._ + +### `list_cat` + +_Alias of [array_concat](#array_concat)._ + +### `list_concat` + +_Alias of [array_concat](#array_concat)._ + +### `list_contains` + +_Alias of [array_has](#array_has)._ + +### `list_dims` + +_Alias of [array_dims](#array_dims)._ + +### `list_distance` + +_Alias of [array_distance](#array_distance)._ + +### `list_distinct` + +_Alias of [array_distinct](#array_distinct)._ + +### `list_element` + +_Alias of [array_element](#array_element)._ + +### `list_empty` + +_Alias of [empty](#empty)._ + +### `list_except` + +_Alias of [array_except](#array_except)._ + +### `list_extract` + +_Alias of [array_element](#array_element)._ + +### `list_has` + +_Alias of [array_has](#array_has)._ + +### `list_has_all` + +_Alias of [array_has_all](#array_has_all)._ + +### `list_has_any` + +_Alias of [array_has_any](#array_has_any)._ + +### `list_indexof` + +_Alias of [array_position](#array_position)._ + +### `list_intersect` + +_Alias of [array_intersect](#array_intersect)._ + +### `list_join` + +_Alias of [array_to_string](#array_to_string)._ + +### `list_length` + +_Alias of [array_length](#array_length)._ + +### `list_ndims` + +_Alias of [array_ndims](#array_ndims)._ + +### `list_pop_back` + +_Alias of [array_pop_back](#array_pop_back)._ + +### `list_pop_front` + +_Alias of [array_pop_front](#array_pop_front)._ + +### `list_position` + +_Alias of [array_position](#array_position)._ -Note: this documentation is in the process of being migrated to be [automatically created from the codebase]. -Please see the [Scalar Functions (new)](scalar_functions_new.md) page for -the rest of the documentation. +### `list_positions` -[automatically created from the codebase]: https://github.com/apache/datafusion/issues/12740 +_Alias of [array_positions](#array_positions)._ -## Conditional Functions +### `list_prepend` -See the new documentation [`here`](https://datafusion.apache.org/user-guide/sql/scalar_functions_new.html) +_Alias of [array_prepend](#array_prepend)._ -## String Functions +### `list_push_back` -See the new documentation [`here`](https://datafusion.apache.org/user-guide/sql/scalar_functions_new.html) +_Alias of [array_append](#array_append)._ -### `position` +### `list_push_front` + +_Alias of [array_prepend](#array_prepend)._ + +### `list_remove` + +_Alias of [array_remove](#array_remove)._ + +### `list_remove_all` + +_Alias of [array_remove_all](#array_remove_all)._ + +### `list_remove_n` + +_Alias of [array_remove_n](#array_remove_n)._ + +### `list_repeat` + +_Alias of [array_repeat](#array_repeat)._ + +### `list_replace` + +_Alias of [array_replace](#array_replace)._ + +### `list_replace_all` + +_Alias of [array_replace_all](#array_replace_all)._ -Returns the position of `substr` in `origstr` (counting from 1). If `substr` does -not appear in `origstr`, return 0. +### `list_replace_n` + +_Alias of [array_replace_n](#array_replace_n)._ + +### `list_resize` + +_Alias of [array_resize](#array_resize)._ + +### `list_reverse` + +_Alias of [array_reverse](#array_reverse)._ + +### `list_slice` + +_Alias of [array_slice](#array_slice)._ + +### `list_sort` + +_Alias of [array_sort](#array_sort)._ + +### `list_to_string` + +_Alias of [array_to_string](#array_to_string)._ + +### `list_union` + +_Alias of [array_union](#array_union)._ + +### `make_array` + +Returns an array using the specified input expressions. ``` -position(substr in origstr) +make_array(expression1[, ..., expression_n]) ``` #### Arguments -- **substr**: The pattern string. -- **origstr**: The model string. +- **expression_n**: Expression to include in the output array. Can be a constant, column, or function, and any combination of arithmetic or string operators. -## Time and Date Functions +#### Example + +```sql +> select make_array(1, 2, 3, 4, 5); ++----------------------------------------------------------+ +| make_array(Int64(1),Int64(2),Int64(3),Int64(4),Int64(5)) | ++----------------------------------------------------------+ +| [1, 2, 3, 4, 5] | ++----------------------------------------------------------+ +``` + +#### Aliases + +- make_list + +### `make_list` -- [extract](#extract) +_Alias of [make_array](#make_array)._ -### `extract` +### `range` -Returns a sub-field from a time value as an integer. +Returns an Arrow array between start and stop with step. The range start..end contains all values with start <= x < end. It is empty if start >= end. Step cannot be 0. ``` -extract(field FROM source) +range(start, stop, step) ``` -Equivalent to calling `date_part('field', source)`. For example, these are equivalent: +#### Arguments + +- **start**: Start of the range. Ints, timestamps, dates or string types that can be coerced to Date32 are supported. +- **end**: End of the range (not included). Type must be the same as start. +- **step**: Increase by step (cannot be 0). Steps less than a day are supported only for timestamp ranges. + +#### Example ```sql -extract(day FROM '2024-04-13'::date) -date_part('day', '2024-04-13'::date) +> select range(2, 10, 3); ++-----------------------------------+ +| range(Int64(2),Int64(10),Int64(3))| ++-----------------------------------+ +| [2, 5, 8] | ++-----------------------------------+ + +> select range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH); ++--------------------------------------------------------------+ +| range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH) | ++--------------------------------------------------------------+ +| [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01] | ++--------------------------------------------------------------+ ``` -See [date_part](#date_part). +### `string_to_array` -## Array Functions +Converts each element to its text representation. -- [range](#range) +``` +array_to_string(array, delimiter) +``` -### `range` +#### Arguments + +- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. +- **delimiter**: Array element separator. + +#### Example + +```sql +> select array_to_string([[1, 2, 3, 4], [5, 6, 7, 8]], ','); ++----------------------------------------------------+ +| array_to_string(List([1,2,3,4,5,6,7,8]),Utf8(",")) | ++----------------------------------------------------+ +| 1,2,3,4,5,6,7,8 | ++----------------------------------------------------+ +``` + +#### Aliases + +- string_to_list -Returns an Arrow array between start and stop with step. `SELECT range(2, 10, 3) -> [2, 5, 8]` or -`SELECT range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH);` +### `string_to_list` -The range start..end contains all values with start <= x < end. It is empty if start >= end. +_Alias of [string_to_array](#string_to_array)._ -Step can not be 0 (then the range will be nonsense.). +## Struct Functions -Note that when the required range is a number, it accepts (stop), (start, stop), and (start, stop, step) as parameters, -but when the required range is a date or timestamp, it must be 3 non-NULL parameters. -For example, +- [named_struct](#named_struct) +- [row](#row) +- [struct](#struct) + +### `named_struct` + +Returns an Arrow struct using the specified name and input expressions pairs. ``` -SELECT range(3); -SELECT range(1,5); -SELECT range(1,5,1); +named_struct(expression1_name, expression1_input[, ..., expression_n_name, expression_n_input]) +``` + +#### Arguments + +- **expression_n_name**: Name of the column field. Must be a constant string. +- **expression_n_input**: Expression to include in the output struct. Can be a constant, column, or function, and any combination of arithmetic or string operators. + +#### Example + +For example, this query converts two columns `a` and `b` to a single column with +a struct type of fields `field_a` and `field_b`: + +```sql +> select * from t; ++---+---+ +| a | b | ++---+---+ +| 1 | 2 | +| 3 | 4 | ++---+---+ +> select named_struct('field_a', a, 'field_b', b) from t; ++-------------------------------------------------------+ +| named_struct(Utf8("field_a"),t.a,Utf8("field_b"),t.b) | ++-------------------------------------------------------+ +| {field_a: 1, field_b: 2} | +| {field_a: 3, field_b: 4} | ++-------------------------------------------------------+ ``` -are allowed in number ranges +### `row` + +_Alias of [struct](#struct)._ + +### `struct` -but in date and timestamp ranges, only +Returns an Arrow struct using the specified input expressions optionally named. +Fields in the returned struct use the optional name or the `cN` naming convention. +For example: `c0`, `c1`, `c2`, etc. ``` -SELECT range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH); -SELECT range(TIMESTAMP '1992-09-01', TIMESTAMP '1993-03-01', INTERVAL '1' MONTH); +struct(expression1[, ..., expression_n]) +``` + +#### Arguments + +- **expression1, expression_n**: Expression to include in the output struct. Can be a constant, column, or function, any combination of arithmetic or string operators. + +#### Example + +For example, this query converts two columns `a` and `b` to a single column with +a struct type of fields `field_a` and `c1`: + +```sql +> select * from t; ++---+---+ +| a | b | ++---+---+ +| 1 | 2 | +| 3 | 4 | ++---+---+ + +-- use default names `c0`, `c1` +> select struct(a, b) from t; ++-----------------+ +| struct(t.a,t.b) | ++-----------------+ +| {c0: 1, c1: 2} | +| {c0: 3, c1: 4} | ++-----------------+ + +-- name the first field `field_a` +select struct(a as field_a, b) from t; ++--------------------------------------------------+ +| named_struct(Utf8("field_a"),t.a,Utf8("c1"),t.b) | ++--------------------------------------------------+ +| {field_a: 1, c1: 2} | +| {field_a: 3, c1: 4} | ++--------------------------------------------------+ ``` -is allowed, and +#### Aliases + +- row + +## Map Functions + +- [element_at](#element_at) +- [map](#map) +- [map_extract](#map_extract) +- [map_keys](#map_keys) +- [map_values](#map_values) + +### `element_at` + +_Alias of [map_extract](#map_extract)._ + +### `map` + +Returns an Arrow map with the specified key-value pairs. + +The `make_map` function creates a map from two lists: one for keys and one for values. Each key must be unique and non-null. ``` -SELECT range(DATE '1992-09-01', DATE '1993-03-01', NULL); -SELECT range(NULL, DATE '1993-03-01', INTERVAL '1' MONTH); -SELECT range(DATE '1992-09-01', NULL, INTERVAL '1' MONTH); +map(key, value) +map(key: value) +make_map(['key1', 'key2'], ['value1', 'value2']) ``` -are not allowed +#### Arguments + +- **key**: For `map`: Expression to be used for key. Can be a constant, column, function, or any combination of arithmetic or string operators. + For `make_map`: The list of keys to be used in the map. Each key must be unique and non-null. +- **value**: For `map`: Expression to be used for value. Can be a constant, column, function, or any combination of arithmetic or string operators. + For `make_map`: The list of values to be mapped to the corresponding keys. + +#### Example + +````sql + -- Using map function + SELECT MAP('type', 'test'); + ---- + {type: test} + + SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]); + ---- + {POST: 41, HEAD: 33, PATCH: } + SELECT MAP([[1,2], [3,4]], ['a', 'b']); + ---- + {[1, 2]: a, [3, 4]: b} + + SELECT MAP { 'a': 1, 'b': 2 }; + ---- + {a: 1, b: 2} + + -- Using make_map function + SELECT MAKE_MAP(['POST', 'HEAD'], [41, 33]); + ---- + {POST: 41, HEAD: 33} + + SELECT MAKE_MAP(['key1', 'key2'], ['value1', null]); + ---- + {key1: value1, key2: } + ``` + + +### `map_extract` + +Returns a list containing the value for the given key or an empty list if the key is not present in the map. + +```` + +map_extract(map, key) + +```` #### Arguments -- **start**: start of the range. Ints, timestamps, dates or string types that can be coerced to Date32 are supported. -- **end**: end of the range (not included). Type must be the same as start. -- **step**: increase by step (can not be 0). Steps less than a day are supported only for timestamp ranges. +- **map**: Map expression. Can be a constant, column, or function, and any combination of map operators. +- **key**: Key to extract from the map. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed. + +#### Example + +```sql +SELECT map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'a'); +---- +[1] + +SELECT map_extract(MAP {1: 'one', 2: 'two'}, 2); +---- +['two'] + +SELECT map_extract(MAP {'x': 10, 'y': NULL, 'z': 30}, 'y'); +---- +[] +```` #### Aliases -- generate_series +- element_at + +### `map_keys` + +Returns a list of all keys in the map. + +``` +map_keys(map) +``` + +#### Arguments + +- **map**: Map expression. Can be a constant, column, or function, and any combination of map operators. + +#### Example + +```sql +SELECT map_keys(MAP {'a': 1, 'b': NULL, 'c': 3}); +---- +[a, b, c] + +SELECT map_keys(map([100, 5], [42, 43])); +---- +[100, 5] +``` + +### `map_values` + +Returns a list of all values in the map. + +``` +map_values(map) +``` + +#### Arguments + +- **map**: Map expression. Can be a constant, column, or function, and any combination of map operators. + +#### Example + +```sql +SELECT map_values(MAP {'a': 1, 'b': NULL, 'c': 3}); +---- +[1, , 3] + +SELECT map_values(map([100, 5], [42, 43])); +---- +[42, 43] +``` + +## Hashing Functions + +- [digest](#digest) +- [md5](#md5) +- [sha224](#sha224) +- [sha256](#sha256) +- [sha384](#sha384) +- [sha512](#sha512) + +### `digest` + +Computes the binary hash of an expression using the specified algorithm. + +``` +digest(expression, algorithm) +``` + +#### Arguments + +- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. +- **algorithm**: String expression specifying algorithm to use. Must be one of: +- md5 +- sha224 +- sha256 +- sha384 +- sha512 +- blake2s +- blake2b +- blake3 + +#### Example + +```sql +> select digest('foo', 'sha256'); ++------------------------------------------+ +| digest(Utf8("foo"), Utf8("sha256")) | ++------------------------------------------+ +| | ++------------------------------------------+ +``` + +### `md5` + +Computes an MD5 128-bit checksum for a string expression. + +``` +md5(expression) +``` + +#### Arguments + +- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select md5('foo'); ++-------------------------------------+ +| md5(Utf8("foo")) | ++-------------------------------------+ +| | ++-------------------------------------+ +``` + +### `sha224` + +Computes the SHA-224 hash of a binary string. + +``` +sha224(expression) +``` + +#### Arguments + +- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select sha224('foo'); ++------------------------------------------+ +| sha224(Utf8("foo")) | ++------------------------------------------+ +| | ++------------------------------------------+ +``` + +### `sha256` + +Computes the SHA-256 hash of a binary string. + +``` +sha256(expression) +``` + +#### Arguments + +- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select sha256('foo'); ++--------------------------------------+ +| sha256(Utf8("foo")) | ++--------------------------------------+ +| | ++--------------------------------------+ +``` + +### `sha384` + +Computes the SHA-384 hash of a binary string. + +``` +sha384(expression) +``` + +#### Arguments + +- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select sha384('foo'); ++-----------------------------------------+ +| sha384(Utf8("foo")) | ++-----------------------------------------+ +| | ++-----------------------------------------+ +``` + +### `sha512` + +Computes the SHA-512 hash of a binary string. + +``` +sha512(expression) +``` + +#### Arguments + +- **expression**: String + +#### Example + +```sql +> select sha512('foo'); ++-------------------------------------------+ +| sha512(Utf8("foo")) | ++-------------------------------------------+ +| | ++-------------------------------------------+ +``` ## Other Functions -See the new documentation [`here`](https://datafusion.apache.org/user-guide/sql/scalar_functions_new.html) +- [arrow_cast](#arrow_cast) +- [arrow_typeof](#arrow_typeof) +- [get_field](#get_field) +- [version](#version) + +### `arrow_cast` + +Casts a value to a specific Arrow data type. + +``` +arrow_cast(expression, datatype) +``` + +#### Arguments + +- **expression**: Expression to cast. The expression can be a constant, column, or function, and any combination of operators. +- **datatype**: [Arrow data type](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html) name to cast to, as a string. The format is the same as that returned by [`arrow_typeof`] + +#### Example + +```sql +> select arrow_cast(-5, 'Int8') as a, + arrow_cast('foo', 'Dictionary(Int32, Utf8)') as b, + arrow_cast('bar', 'LargeUtf8') as c, + arrow_cast('2023-01-02T12:53:02', 'Timestamp(Microsecond, Some("+08:00"))') as d + ; ++----+-----+-----+---------------------------+ +| a | b | c | d | ++----+-----+-----+---------------------------+ +| -5 | foo | bar | 2023-01-02T12:53:02+08:00 | ++----+-----+-----+---------------------------+ +``` + +### `arrow_typeof` + +Returns the name of the underlying [Arrow data type](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html) of the expression. + +``` +arrow_typeof(expression) +``` + +#### Arguments + +- **expression**: Expression to evaluate. The expression can be a constant, column, or function, and any combination of operators. + +#### Example + +```sql +> select arrow_typeof('foo'), arrow_typeof(1); ++---------------------------+------------------------+ +| arrow_typeof(Utf8("foo")) | arrow_typeof(Int64(1)) | ++---------------------------+------------------------+ +| Utf8 | Int64 | ++---------------------------+------------------------+ +``` + +### `get_field` + +Returns a field within a map or a struct with the given key. +Note: most users invoke `get_field` indirectly via field access +syntax such as `my_struct_col['field_name']` which results in a call to +`get_field(my_struct_col, 'field_name')`. + +``` +get_field(expression1, expression2) +``` + +#### Arguments + +- **expression1**: The map or struct to retrieve a field for. +- **expression2**: The field name in the map or struct to retrieve data for. Must evaluate to a string. + +#### Example + +```sql +> create table t (idx varchar, v varchar) as values ('data','fusion'), ('apache', 'arrow'); +> select struct(idx, v) from t as c; ++-------------------------+ +| struct(c.idx,c.v) | ++-------------------------+ +| {c0: data, c1: fusion} | +| {c0: apache, c1: arrow} | ++-------------------------+ +> select get_field((select struct(idx, v) from t), 'c0'); ++-----------------------+ +| struct(t.idx,t.v)[c0] | ++-----------------------+ +| data | +| apache | ++-----------------------+ +> select get_field((select struct(idx, v) from t), 'c1'); ++-----------------------+ +| struct(t.idx,t.v)[c1] | ++-----------------------+ +| fusion | +| arrow | ++-----------------------+ +``` + +### `version` + +Returns the version of DataFusion. + +``` +version() +``` + +#### Example + +```sql +> select version(); ++--------------------------------------------+ +| version() | ++--------------------------------------------+ +| Apache DataFusion 42.0.0, aarch64 on macos | ++--------------------------------------------+ +``` diff --git a/docs/source/user-guide/sql/scalar_functions_new.md b/docs/source/user-guide/sql/scalar_functions_new.md deleted file mode 100644 index 56173b97b405..000000000000 --- a/docs/source/user-guide/sql/scalar_functions_new.md +++ /dev/null @@ -1,4365 +0,0 @@ - - - - -# Scalar Functions (NEW) - -Note: this documentation is in the process of being migrated to be [automatically created from the codebase]. -Please see the [Scalar Functions (old)](aggregate_functions.md) page for -the rest of the documentation. - -[automatically created from the codebase]: https://github.com/apache/datafusion/issues/12740 - -## Math Functions - -- [abs](#abs) -- [acos](#acos) -- [acosh](#acosh) -- [asin](#asin) -- [asinh](#asinh) -- [atan](#atan) -- [atan2](#atan2) -- [atanh](#atanh) -- [cbrt](#cbrt) -- [ceil](#ceil) -- [cos](#cos) -- [cosh](#cosh) -- [cot](#cot) -- [degrees](#degrees) -- [exp](#exp) -- [factorial](#factorial) -- [floor](#floor) -- [gcd](#gcd) -- [isnan](#isnan) -- [iszero](#iszero) -- [lcm](#lcm) -- [ln](#ln) -- [log](#log) -- [log10](#log10) -- [log2](#log2) -- [nanvl](#nanvl) -- [pi](#pi) -- [pow](#pow) -- [power](#power) -- [radians](#radians) -- [random](#random) -- [round](#round) -- [signum](#signum) -- [sin](#sin) -- [sinh](#sinh) -- [sqrt](#sqrt) -- [tan](#tan) -- [tanh](#tanh) -- [trunc](#trunc) - -### `abs` - -Returns the absolute value of a number. - -``` -abs(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `acos` - -Returns the arc cosine or inverse cosine of a number. - -``` -acos(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `acosh` - -Returns the area hyperbolic cosine or inverse hyperbolic cosine of a number. - -``` -acosh(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `asin` - -Returns the arc sine or inverse sine of a number. - -``` -asin(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `asinh` - -Returns the area hyperbolic sine or inverse hyperbolic sine of a number. - -``` -asinh(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `atan` - -Returns the arc tangent or inverse tangent of a number. - -``` -atan(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `atan2` - -Returns the arc tangent or inverse tangent of `expression_y / expression_x`. - -``` -atan2(expression_y, expression_x) -``` - -#### Arguments - -- **expression_y**: First numeric expression to operate on. - Can be a constant, column, or function, and any combination of arithmetic operators. -- **expression_x**: Second numeric expression to operate on. - Can be a constant, column, or function, and any combination of arithmetic operators. - -### `atanh` - -Returns the area hyperbolic tangent or inverse hyperbolic tangent of a number. - -``` -atanh(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `cbrt` - -Returns the cube root of a number. - -``` -cbrt(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `ceil` - -Returns the nearest integer greater than or equal to a number. - -``` -ceil(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `cos` - -Returns the cosine of a number. - -``` -cos(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `cosh` - -Returns the hyperbolic cosine of a number. - -``` -cosh(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `cot` - -Returns the cotangent of a number. - -``` -cot(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `degrees` - -Converts radians to degrees. - -``` -degrees(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `exp` - -Returns the base-e exponential of a number. - -``` -exp(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `factorial` - -Factorial. Returns 1 if value is less than 2. - -``` -factorial(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `floor` - -Returns the nearest integer less than or equal to a number. - -``` -floor(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `gcd` - -Returns the greatest common divisor of `expression_x` and `expression_y`. Returns 0 if both inputs are zero. - -``` -gcd(expression_x, expression_y) -``` - -#### Arguments - -- **expression_x**: First numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **expression_y**: Second numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `isnan` - -Returns true if a given number is +NaN or -NaN otherwise returns false. - -``` -isnan(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `iszero` - -Returns true if a given number is +0.0 or -0.0 otherwise returns false. - -``` -iszero(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `lcm` - -Returns the least common multiple of `expression_x` and `expression_y`. Returns 0 if either input is zero. - -``` -lcm(expression_x, expression_y) -``` - -#### Arguments - -- **expression_x**: First numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **expression_y**: Second numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `ln` - -Returns the natural logarithm of a number. - -``` -ln(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `log` - -Returns the base-x logarithm of a number. Can either provide a specified base, or if omitted then takes the base-10 of a number. - -``` -log(base, numeric_expression) -log(numeric_expression) -``` - -#### Arguments - -- **base**: Base numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `log10` - -Returns the base-10 logarithm of a number. - -``` -log10(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `log2` - -Returns the base-2 logarithm of a number. - -``` -log2(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `nanvl` - -Returns the first argument if it's not _NaN_. -Returns the second argument otherwise. - -``` -nanvl(expression_x, expression_y) -``` - -#### Arguments - -- **expression_x**: Numeric expression to return if it's not _NaN_. Can be a constant, column, or function, and any combination of arithmetic operators. -- **expression_y**: Numeric expression to return if the first expression is _NaN_. Can be a constant, column, or function, and any combination of arithmetic operators. - -### `pi` - -Returns an approximate value of π. - -``` -pi() -``` - -### `pow` - -_Alias of [power](#power)._ - -### `power` - -Returns a base expression raised to the power of an exponent. - -``` -power(base, exponent) -``` - -#### Arguments - -- **base**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **exponent**: Exponent numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Aliases - -- pow - -### `radians` - -Converts degrees to radians. - -``` -radians(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `random` - -Returns a random float value in the range [0, 1). -The random seed is unique to each row. - -``` -random() -``` - -### `round` - -Rounds a number to the nearest integer. - -``` -round(numeric_expression[, decimal_places]) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **decimal_places**: Optional. The number of decimal places to round to. Defaults to 0. - -### `signum` - -Returns the sign of a number. -Negative numbers return `-1`. -Zero and positive numbers return `1`. - -``` -signum(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `sin` - -Returns the sine of a number. - -``` -sin(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `sinh` - -Returns the hyperbolic sine of a number. - -``` -sinh(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `sqrt` - -Returns the square root of a number. - -``` -sqrt(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `tan` - -Returns the tangent of a number. - -``` -tan(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `tanh` - -Returns the hyperbolic tangent of a number. - -``` -tanh(numeric_expression) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. - -### `trunc` - -Truncates a number to a whole number or truncated to the specified decimal places. - -``` -trunc(numeric_expression[, decimal_places]) -``` - -#### Arguments - -- **numeric_expression**: Numeric expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **decimal_places**: Optional. The number of decimal places to - truncate to. Defaults to 0 (truncate to a whole number). If - `decimal_places` is a positive integer, truncates digits to the - right of the decimal point. If `decimal_places` is a negative - integer, replaces digits to the left of the decimal point with `0`. - -## Conditional Functions - -- [coalesce](#coalesce) -- [ifnull](#ifnull) -- [nullif](#nullif) -- [nvl](#nvl) -- [nvl2](#nvl2) - -### `coalesce` - -Returns the first of its arguments that is not _null_. Returns _null_ if all arguments are _null_. This function is often used to substitute a default value for _null_ values. - -``` -coalesce(expression1[, ..., expression_n]) -``` - -#### Arguments - -- **expression1, expression_n**: Expression to use if previous expressions are _null_. Can be a constant, column, or function, and any combination of arithmetic operators. Pass as many expression arguments as necessary. - -#### Example - -```sql -> select coalesce(null, null, 'datafusion'); -+----------------------------------------+ -| coalesce(NULL,NULL,Utf8("datafusion")) | -+----------------------------------------+ -| datafusion | -+----------------------------------------+ -``` - -### `ifnull` - -_Alias of [nvl](#nvl)._ - -### `nullif` - -Returns _null_ if _expression1_ equals _expression2_; otherwise it returns _expression1_. -This can be used to perform the inverse operation of [`coalesce`](#coalesce). - -``` -nullif(expression1, expression2) -``` - -#### Arguments - -- **expression1**: Expression to compare and return if equal to expression2. Can be a constant, column, or function, and any combination of operators. -- **expression2**: Expression to compare to expression1. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> select nullif('datafusion', 'data'); -+-----------------------------------------+ -| nullif(Utf8("datafusion"),Utf8("data")) | -+-----------------------------------------+ -| datafusion | -+-----------------------------------------+ -> select nullif('datafusion', 'datafusion'); -+-----------------------------------------------+ -| nullif(Utf8("datafusion"),Utf8("datafusion")) | -+-----------------------------------------------+ -| | -+-----------------------------------------------+ -``` - -### `nvl` - -Returns _expression2_ if _expression1_ is NULL otherwise it returns _expression1_. - -``` -nvl(expression1, expression2) -``` - -#### Arguments - -- **expression1**: Expression to return if not null. Can be a constant, column, or function, and any combination of operators. -- **expression2**: Expression to return if expr1 is null. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> select nvl(null, 'a'); -+---------------------+ -| nvl(NULL,Utf8("a")) | -+---------------------+ -| a | -+---------------------+\ -> select nvl('b', 'a'); -+--------------------------+ -| nvl(Utf8("b"),Utf8("a")) | -+--------------------------+ -| b | -+--------------------------+ -``` - -#### Aliases - -- ifnull - -### `nvl2` - -Returns _expression2_ if _expression1_ is not NULL; otherwise it returns _expression3_. - -``` -nvl2(expression1, expression2, expression3) -``` - -#### Arguments - -- **expression1**: Expression to test for null. Can be a constant, column, or function, and any combination of operators. -- **expression2**: Expression to return if expr1 is not null. Can be a constant, column, or function, and any combination of operators. -- **expression3**: Expression to return if expr1 is null. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> select nvl2(null, 'a', 'b'); -+--------------------------------+ -| nvl2(NULL,Utf8("a"),Utf8("b")) | -+--------------------------------+ -| b | -+--------------------------------+ -> select nvl2('data', 'a', 'b'); -+----------------------------------------+ -| nvl2(Utf8("data"),Utf8("a"),Utf8("b")) | -+----------------------------------------+ -| a | -+----------------------------------------+ -``` - -## String Functions - -- [ascii](#ascii) -- [bit_length](#bit_length) -- [btrim](#btrim) -- [char_length](#char_length) -- [character_length](#character_length) -- [chr](#chr) -- [concat](#concat) -- [concat_ws](#concat_ws) -- [contains](#contains) -- [ends_with](#ends_with) -- [find_in_set](#find_in_set) -- [initcap](#initcap) -- [instr](#instr) -- [left](#left) -- [length](#length) -- [levenshtein](#levenshtein) -- [lower](#lower) -- [lpad](#lpad) -- [ltrim](#ltrim) -- [octet_length](#octet_length) -- [position](#position) -- [repeat](#repeat) -- [replace](#replace) -- [reverse](#reverse) -- [right](#right) -- [rpad](#rpad) -- [rtrim](#rtrim) -- [split_part](#split_part) -- [starts_with](#starts_with) -- [strpos](#strpos) -- [substr](#substr) -- [substr_index](#substr_index) -- [substring](#substring) -- [substring_index](#substring_index) -- [to_hex](#to_hex) -- [translate](#translate) -- [trim](#trim) -- [upper](#upper) -- [uuid](#uuid) - -### `ascii` - -Returns the Unicode character code of the first character in a string. - -``` -ascii(str) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> select ascii('abc'); -+--------------------+ -| ascii(Utf8("abc")) | -+--------------------+ -| 97 | -+--------------------+ -> select ascii('🚀'); -+-------------------+ -| ascii(Utf8("🚀")) | -+-------------------+ -| 128640 | -+-------------------+ -``` - -**Related functions**: - -- [chr](#chr) - -### `bit_length` - -Returns the bit length of a string. - -``` -bit_length(str) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> select bit_length('datafusion'); -+--------------------------------+ -| bit_length(Utf8("datafusion")) | -+--------------------------------+ -| 80 | -+--------------------------------+ -``` - -**Related functions**: - -- [length](#length) -- [octet_length](#octet_length) - -### `btrim` - -Trims the specified trim string from the start and end of a string. If no trim string is provided, all whitespace is removed from the start and end of the input string. - -``` -btrim(str[, trim_str]) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **trim_str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. _Default is whitespace characters._ - -#### Example - -```sql -> select btrim('__datafusion____', '_'); -+-------------------------------------------+ -| btrim(Utf8("__datafusion____"),Utf8("_")) | -+-------------------------------------------+ -| datafusion | -+-------------------------------------------+ -``` - -#### Alternative Syntax - -```sql -trim(BOTH trim_str FROM str) -``` - -```sql -trim(trim_str FROM str) -``` - -#### Aliases - -- trim - -**Related functions**: - -- [ltrim](#ltrim) -- [rtrim](#rtrim) - -### `char_length` - -_Alias of [character_length](#character_length)._ - -### `character_length` - -Returns the number of characters in a string. - -``` -character_length(str) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> select character_length('Ångström'); -+------------------------------------+ -| character_length(Utf8("Ångström")) | -+------------------------------------+ -| 8 | -+------------------------------------+ -``` - -#### Aliases - -- length -- char_length - -**Related functions**: - -- [bit_length](#bit_length) -- [octet_length](#octet_length) - -### `chr` - -Returns the character with the specified ASCII or Unicode code value. - -``` -chr(expression) -``` - -#### Arguments - -- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> select chr(128640); -+--------------------+ -| chr(Int64(128640)) | -+--------------------+ -| 🚀 | -+--------------------+ -``` - -**Related functions**: - -- [ascii](#ascii) - -### `concat` - -Concatenates multiple strings together. - -``` -concat(str[, ..., str_n]) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **str_n**: Subsequent string expressions to concatenate. - -#### Example - -```sql -> select concat('data', 'f', 'us', 'ion'); -+-------------------------------------------------------+ -| concat(Utf8("data"),Utf8("f"),Utf8("us"),Utf8("ion")) | -+-------------------------------------------------------+ -| datafusion | -+-------------------------------------------------------+ -``` - -**Related functions**: - -- [concat_ws](#concat_ws) - -### `concat_ws` - -Concatenates multiple strings together with a specified separator. - -``` -concat_ws(separator, str[, ..., str_n]) -``` - -#### Arguments - -- **separator**: Separator to insert between concatenated strings. -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **str_n**: Subsequent string expressions to concatenate. - -#### Example - -```sql -> select concat_ws('_', 'data', 'fusion'); -+--------------------------------------------------+ -| concat_ws(Utf8("_"),Utf8("data"),Utf8("fusion")) | -+--------------------------------------------------+ -| data_fusion | -+--------------------------------------------------+ -``` - -**Related functions**: - -- [concat](#concat) - -### `contains` - -Return true if search_str is found within string (case-sensitive). - -``` -contains(str, search_str) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **search_str**: The string to search for in str. - -#### Example - -```sql -> select contains('the quick brown fox', 'row'); -+---------------------------------------------------+ -| contains(Utf8("the quick brown fox"),Utf8("row")) | -+---------------------------------------------------+ -| true | -+---------------------------------------------------+ -``` - -### `ends_with` - -Tests if a string ends with a substring. - -``` -ends_with(str, substr) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **substr**: Substring to test for. - -#### Example - -```sql -> select ends_with('datafusion', 'soin'); -+--------------------------------------------+ -| ends_with(Utf8("datafusion"),Utf8("soin")) | -+--------------------------------------------+ -| false | -+--------------------------------------------+ -> select ends_with('datafusion', 'sion'); -+--------------------------------------------+ -| ends_with(Utf8("datafusion"),Utf8("sion")) | -+--------------------------------------------+ -| true | -+--------------------------------------------+ -``` - -### `find_in_set` - -Returns a value in the range of 1 to N if the string str is in the string list strlist consisting of N substrings. - -``` -find_in_set(str, strlist) -``` - -#### Arguments - -- **str**: String expression to find in strlist. -- **strlist**: A string list is a string composed of substrings separated by , characters. - -#### Example - -```sql -> select find_in_set('b', 'a,b,c,d'); -+----------------------------------------+ -| find_in_set(Utf8("b"),Utf8("a,b,c,d")) | -+----------------------------------------+ -| 2 | -+----------------------------------------+ -``` - -### `initcap` - -Capitalizes the first character in each word in the input string. Words are delimited by non-alphanumeric characters. - -``` -initcap(str) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> select initcap('apache datafusion'); -+------------------------------------+ -| initcap(Utf8("apache datafusion")) | -+------------------------------------+ -| Apache Datafusion | -+------------------------------------+ -``` - -**Related functions**: - -- [lower](#lower) -- [upper](#upper) - -### `instr` - -_Alias of [strpos](#strpos)._ - -### `left` - -Returns a specified number of characters from the left side of a string. - -``` -left(str, n) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **n**: Number of characters to return. - -#### Example - -```sql -> select left('datafusion', 4); -+-----------------------------------+ -| left(Utf8("datafusion"),Int64(4)) | -+-----------------------------------+ -| data | -+-----------------------------------+ -``` - -**Related functions**: - -- [right](#right) - -### `length` - -_Alias of [character_length](#character_length)._ - -### `levenshtein` - -Returns the [`Levenshtein distance`](https://en.wikipedia.org/wiki/Levenshtein_distance) between the two given strings. - -``` -levenshtein(str1, str2) -``` - -#### Arguments - -- **str1**: String expression to compute Levenshtein distance with str2. -- **str2**: String expression to compute Levenshtein distance with str1. - -#### Example - -```sql -> select levenshtein('kitten', 'sitting'); -+---------------------------------------------+ -| levenshtein(Utf8("kitten"),Utf8("sitting")) | -+---------------------------------------------+ -| 3 | -+---------------------------------------------+ -``` - -### `lower` - -Converts a string to lower-case. - -``` -lower(str) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> select lower('Ångström'); -+-------------------------+ -| lower(Utf8("Ångström")) | -+-------------------------+ -| ångström | -+-------------------------+ -``` - -**Related functions**: - -- [initcap](#initcap) -- [upper](#upper) - -### `lpad` - -Pads the left side of a string with another string to a specified string length. - -``` -lpad(str, n[, padding_str]) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **n**: String length to pad to. -- **padding_str**: Optional string expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._ - -#### Example - -```sql -> select lpad('Dolly', 10, 'hello'); -+---------------------------------------------+ -| lpad(Utf8("Dolly"),Int64(10),Utf8("hello")) | -+---------------------------------------------+ -| helloDolly | -+---------------------------------------------+ -``` - -**Related functions**: - -- [rpad](#rpad) - -### `ltrim` - -Trims the specified trim string from the beginning of a string. If no trim string is provided, all whitespace is removed from the start of the input string. - -``` -ltrim(str[, trim_str]) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **trim_str**: String expression to trim from the beginning of the input string. Can be a constant, column, or function, and any combination of arithmetic operators. _Default is whitespace characters._ - -#### Example - -```sql -> select ltrim(' datafusion '); -+-------------------------------+ -| ltrim(Utf8(" datafusion ")) | -+-------------------------------+ -| datafusion | -+-------------------------------+ -> select ltrim('___datafusion___', '_'); -+-------------------------------------------+ -| ltrim(Utf8("___datafusion___"),Utf8("_")) | -+-------------------------------------------+ -| datafusion___ | -+-------------------------------------------+ -``` - -#### Alternative Syntax - -```sql -trim(LEADING trim_str FROM str) -``` - -**Related functions**: - -- [btrim](#btrim) -- [rtrim](#rtrim) - -### `octet_length` - -Returns the length of a string in bytes. - -``` -octet_length(str) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> select octet_length('Ångström'); -+--------------------------------+ -| octet_length(Utf8("Ångström")) | -+--------------------------------+ -| 10 | -+--------------------------------+ -``` - -**Related functions**: - -- [bit_length](#bit_length) -- [length](#length) - -### `position` - -_Alias of [strpos](#strpos)._ - -### `repeat` - -Returns a string with an input string repeated a specified number. - -``` -repeat(str, n) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **n**: Number of times to repeat the input string. - -#### Example - -```sql -> select repeat('data', 3); -+-------------------------------+ -| repeat(Utf8("data"),Int64(3)) | -+-------------------------------+ -| datadatadata | -+-------------------------------+ -``` - -### `replace` - -Replaces all occurrences of a specified substring in a string with a new substring. - -``` -replace(str, substr, replacement) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **substr**: Substring expression to replace in the input string. Substring expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **replacement**: Replacement substring expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> select replace('ABabbaBA', 'ab', 'cd'); -+-------------------------------------------------+ -| replace(Utf8("ABabbaBA"),Utf8("ab"),Utf8("cd")) | -+-------------------------------------------------+ -| ABcdbaBA | -+-------------------------------------------------+ -``` - -### `reverse` - -Reverses the character order of a string. - -``` -reverse(str) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> select reverse('datafusion'); -+-----------------------------+ -| reverse(Utf8("datafusion")) | -+-----------------------------+ -| noisufatad | -+-----------------------------+ -``` - -### `right` - -Returns a specified number of characters from the right side of a string. - -``` -right(str, n) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **n**: Number of characters to return - -#### Example - -```sql -> select right('datafusion', 6); -+------------------------------------+ -| right(Utf8("datafusion"),Int64(6)) | -+------------------------------------+ -| fusion | -+------------------------------------+ -``` - -**Related functions**: - -- [left](#left) - -### `rpad` - -Pads the right side of a string with another string to a specified string length. - -``` -rpad(str, n[, padding_str]) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **n**: String length to pad to. -- **padding_str**: String expression to pad with. Can be a constant, column, or function, and any combination of string operators. _Default is a space._ - -#### Example - -```sql -> select rpad('datafusion', 20, '_-'); -+-----------------------------------------------+ -| rpad(Utf8("datafusion"),Int64(20),Utf8("_-")) | -+-----------------------------------------------+ -| datafusion_-_-_-_-_- | -+-----------------------------------------------+ -``` - -**Related functions**: - -- [lpad](#lpad) - -### `rtrim` - -Trims the specified trim string from the end of a string. If no trim string is provided, all whitespace is removed from the end of the input string. - -``` -rtrim(str[, trim_str]) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **trim_str**: String expression to trim from the end of the input string. Can be a constant, column, or function, and any combination of arithmetic operators. _Default is whitespace characters._ - -#### Example - -```sql -> select rtrim(' datafusion '); -+-------------------------------+ -| rtrim(Utf8(" datafusion ")) | -+-------------------------------+ -| datafusion | -+-------------------------------+ -> select rtrim('___datafusion___', '_'); -+-------------------------------------------+ -| rtrim(Utf8("___datafusion___"),Utf8("_")) | -+-------------------------------------------+ -| ___datafusion | -+-------------------------------------------+ -``` - -#### Alternative Syntax - -```sql -trim(TRAILING trim_str FROM str) -``` - -**Related functions**: - -- [btrim](#btrim) -- [ltrim](#ltrim) - -### `split_part` - -Splits a string based on a specified delimiter and returns the substring in the specified position. - -``` -split_part(str, delimiter, pos) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **delimiter**: String or character to split on. -- **pos**: Position of the part to return. - -#### Example - -```sql -> select split_part('1.2.3.4.5', '.', 3); -+--------------------------------------------------+ -| split_part(Utf8("1.2.3.4.5"),Utf8("."),Int64(3)) | -+--------------------------------------------------+ -| 3 | -+--------------------------------------------------+ -``` - -### `starts_with` - -Tests if a string starts with a substring. - -``` -starts_with(str, substr) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **substr**: Substring to test for. - -#### Example - -```sql -> select starts_with('datafusion','data'); -+----------------------------------------------+ -| starts_with(Utf8("datafusion"),Utf8("data")) | -+----------------------------------------------+ -| true | -+----------------------------------------------+ -``` - -### `strpos` - -Returns the starting position of a specified substring in a string. Positions begin at 1. If the substring does not exist in the string, the function returns 0. - -``` -strpos(str, substr) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **substr**: Substring expression to search for. - -#### Example - -```sql -> select strpos('datafusion', 'fus'); -+----------------------------------------+ -| strpos(Utf8("datafusion"),Utf8("fus")) | -+----------------------------------------+ -| 5 | -+----------------------------------------+ -``` - -#### Alternative Syntax - -```sql -position(substr in origstr) -``` - -#### Aliases - -- instr -- position - -### `substr` - -Extracts a substring of a specified number of characters from a specific starting position in a string. - -``` -substr(str, start_pos[, length]) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **start_pos**: Character position to start the substring at. The first character in the string has a position of 1. -- **length**: Number of characters to extract. If not specified, returns the rest of the string after the start position. - -#### Example - -```sql -> select substr('datafusion', 5, 3); -+----------------------------------------------+ -| substr(Utf8("datafusion"),Int64(5),Int64(3)) | -+----------------------------------------------+ -| fus | -+----------------------------------------------+ -``` - -#### Alternative Syntax - -```sql -substring(str from start_pos for length) -``` - -#### Aliases - -- substring - -### `substr_index` - -Returns the substring from str before count occurrences of the delimiter delim. -If count is positive, everything to the left of the final delimiter (counting from the left) is returned. -If count is negative, everything to the right of the final delimiter (counting from the right) is returned. - -``` -substr_index(str, delim, count) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **delim**: The string to find in str to split str. -- **count**: The number of times to search for the delimiter. Can be either a positive or negative number. - -#### Example - -```sql -> select substr_index('www.apache.org', '.', 1); -+---------------------------------------------------------+ -| substr_index(Utf8("www.apache.org"),Utf8("."),Int64(1)) | -+---------------------------------------------------------+ -| www | -+---------------------------------------------------------+ -> select substr_index('www.apache.org', '.', -1); -+----------------------------------------------------------+ -| substr_index(Utf8("www.apache.org"),Utf8("."),Int64(-1)) | -+----------------------------------------------------------+ -| org | -+----------------------------------------------------------+ -``` - -#### Aliases - -- substring_index - -### `substring` - -_Alias of [substr](#substr)._ - -### `substring_index` - -_Alias of [substr_index](#substr_index)._ - -### `to_hex` - -Converts an integer to a hexadecimal string. - -``` -to_hex(int) -``` - -#### Arguments - -- **int**: Integer expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> select to_hex(12345689); -+-------------------------+ -| to_hex(Int64(12345689)) | -+-------------------------+ -| bc6159 | -+-------------------------+ -``` - -### `translate` - -Translates characters in a string to specified translation characters. - -``` -translate(str, chars, translation) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **chars**: Characters to translate. -- **translation**: Translation characters. Translation characters replace only characters at the same position in the **chars** string. - -#### Example - -```sql -> select translate('twice', 'wic', 'her'); -+--------------------------------------------------+ -| translate(Utf8("twice"),Utf8("wic"),Utf8("her")) | -+--------------------------------------------------+ -| there | -+--------------------------------------------------+ -``` - -### `trim` - -_Alias of [btrim](#btrim)._ - -### `upper` - -Converts a string to upper-case. - -``` -upper(str) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> select upper('dataFusion'); -+---------------------------+ -| upper(Utf8("dataFusion")) | -+---------------------------+ -| DATAFUSION | -+---------------------------+ -``` - -**Related functions**: - -- [initcap](#initcap) -- [lower](#lower) - -### `uuid` - -Returns [`UUID v4`]() string value which is unique per row. - -``` -uuid() -``` - -#### Example - -```sql -> select uuid(); -+--------------------------------------+ -| uuid() | -+--------------------------------------+ -| 6ec17ef8-1934-41cc-8d59-d0c8f9eea1f0 | -+--------------------------------------+ -``` - -## Binary String Functions - -- [decode](#decode) -- [encode](#encode) - -### `decode` - -Decode binary data from textual representation in string. - -``` -decode(expression, format) -``` - -#### Arguments - -- **expression**: Expression containing encoded string data -- **format**: Same arguments as [encode](#encode) - -**Related functions**: - -- [encode](#encode) - -### `encode` - -Encode binary data into a textual representation. - -``` -encode(expression, format) -``` - -#### Arguments - -- **expression**: Expression containing string or binary data -- **format**: Supported formats are: `base64`, `hex` - -**Related functions**: - -- [decode](#decode) - -## Regular Expression Functions - -Apache DataFusion uses a [PCRE-like](https://en.wikibooks.org/wiki/Regular_Expressions/Perl-Compatible_Regular_Expressions) -regular expression [syntax](https://docs.rs/regex/latest/regex/#syntax) -(minus support for several features including look-around and backreferences). -The following regular expression functions are supported: - -- [regexp_count](#regexp_count) -- [regexp_like](#regexp_like) -- [regexp_match](#regexp_match) -- [regexp_replace](#regexp_replace) - -### `regexp_count` - -Returns the number of matches that a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has in a string. - -``` -regexp_count(str, regexp[, start, flags]) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **regexp**: Regular expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **start**: - **start**: Optional start position (the first position is 1) to search for the regular expression. Can be a constant, column, or function. -- **flags**: Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: - - **i**: case-insensitive: letters match both upper and lower case - - **m**: multi-line mode: ^ and $ match begin/end of line - - **s**: allow . to match \n - - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used - - **U**: swap the meaning of x* and x*? - -#### Example - -```sql -> select regexp_count('abcAbAbc', 'abc', 2, 'i'); -+---------------------------------------------------------------+ -| regexp_count(Utf8("abcAbAbc"),Utf8("abc"),Int64(2),Utf8("i")) | -+---------------------------------------------------------------+ -| 1 | -+---------------------------------------------------------------+ -``` - -### `regexp_like` - -Returns true if a [regular expression](https://docs.rs/regex/latest/regex/#syntax) has at least one match in a string, false otherwise. - -``` -regexp_like(str, regexp[, flags]) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **regexp**: Regular expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **flags**: Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: - - **i**: case-insensitive: letters match both upper and lower case - - **m**: multi-line mode: ^ and $ match begin/end of line - - **s**: allow . to match \n - - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used - - **U**: swap the meaning of x* and x*? - -#### Example - -```sql -select regexp_like('Köln', '[a-zA-Z]ö[a-zA-Z]{2}'); -+--------------------------------------------------------+ -| regexp_like(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) | -+--------------------------------------------------------+ -| true | -+--------------------------------------------------------+ -SELECT regexp_like('aBc', '(b|d)', 'i'); -+--------------------------------------------------+ -| regexp_like(Utf8("aBc"),Utf8("(b|d)"),Utf8("i")) | -+--------------------------------------------------+ -| true | -+--------------------------------------------------+ -``` - -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) - -### `regexp_match` - -Returns the first [regular expression](https://docs.rs/regex/latest/regex/#syntax) matches in a string. - -``` -regexp_match(str, regexp[, flags]) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **regexp**: Regular expression to match against. - Can be a constant, column, or function. -- **flags**: Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: - - **i**: case-insensitive: letters match both upper and lower case - - **m**: multi-line mode: ^ and $ match begin/end of line - - **s**: allow . to match \n - - **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used - - **U**: swap the meaning of x* and x*? - -#### Example - -```sql - > select regexp_match('Köln', '[a-zA-Z]ö[a-zA-Z]{2}'); - +---------------------------------------------------------+ - | regexp_match(Utf8("Köln"),Utf8("[a-zA-Z]ö[a-zA-Z]{2}")) | - +---------------------------------------------------------+ - | [Köln] | - +---------------------------------------------------------+ - SELECT regexp_match('aBc', '(b|d)', 'i'); - +---------------------------------------------------+ - | regexp_match(Utf8("aBc"),Utf8("(b|d)"),Utf8("i")) | - +---------------------------------------------------+ - | [B] | - +---------------------------------------------------+ -``` - -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) - -### `regexp_replace` - -Replaces substrings in a string that match a [regular expression](https://docs.rs/regex/latest/regex/#syntax). - -``` -regexp_replace(str, regexp, replacement[, flags]) -``` - -#### Arguments - -- **str**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **regexp**: Regular expression to match against. - Can be a constant, column, or function. -- **replacement**: Replacement string expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **flags**: Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: -- **g**: (global) Search globally and don't return after the first match -- **i**: case-insensitive: letters match both upper and lower case -- **m**: multi-line mode: ^ and $ match begin/end of line -- **s**: allow . to match \n -- **R**: enables CRLF mode: when multi-line mode is enabled, \r\n is used -- **U**: swap the meaning of x* and x*? - -#### Example - -```sql -> select regexp_replace('foobarbaz', 'b(..)', 'X\\1Y', 'g'); -+------------------------------------------------------------------------+ -| regexp_replace(Utf8("foobarbaz"),Utf8("b(..)"),Utf8("X\1Y"),Utf8("g")) | -+------------------------------------------------------------------------+ -| fooXarYXazY | -+------------------------------------------------------------------------+ -SELECT regexp_replace('aBc', '(b|d)', 'Ab\\1a', 'i'); -+-------------------------------------------------------------------+ -| regexp_replace(Utf8("aBc"),Utf8("(b|d)"),Utf8("Ab\1a"),Utf8("i")) | -+-------------------------------------------------------------------+ -| aAbBac | -+-------------------------------------------------------------------+ -``` - -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/regexp.rs) - -## Time and Date Functions - -- [current_date](#current_date) -- [current_time](#current_time) -- [current_timestamp](#current_timestamp) -- [date_bin](#date_bin) -- [date_format](#date_format) -- [date_part](#date_part) -- [date_trunc](#date_trunc) -- [datepart](#datepart) -- [datetrunc](#datetrunc) -- [from_unixtime](#from_unixtime) -- [make_date](#make_date) -- [now](#now) -- [to_char](#to_char) -- [to_date](#to_date) -- [to_local_time](#to_local_time) -- [to_timestamp](#to_timestamp) -- [to_timestamp_micros](#to_timestamp_micros) -- [to_timestamp_millis](#to_timestamp_millis) -- [to_timestamp_nanos](#to_timestamp_nanos) -- [to_timestamp_seconds](#to_timestamp_seconds) -- [to_unixtime](#to_unixtime) -- [today](#today) - -### `current_date` - -Returns the current UTC date. - -The `current_date()` return value is determined at query time and will return the same date, no matter when in the query plan the function executes. - -``` -current_date() -``` - -#### Aliases - -- today - -### `current_time` - -Returns the current UTC time. - -The `current_time()` return value is determined at query time and will return the same time, no matter when in the query plan the function executes. - -``` -current_time() -``` - -### `current_timestamp` - -_Alias of [now](#now)._ - -### `date_bin` - -Calculates time intervals and returns the start of the interval nearest to the specified timestamp. Use `date_bin` to downsample time series data by grouping rows into time-based "bins" or "windows" and applying an aggregate or selector function to each window. - -For example, if you "bin" or "window" data into 15 minute intervals, an input timestamp of `2023-01-01T18:18:18Z` will be updated to the start time of the 15 minute bin it is in: `2023-01-01T18:15:00Z`. - -``` -date_bin(interval, expression, origin-timestamp) -``` - -#### Arguments - -- **interval**: Bin interval. -- **expression**: Time expression to operate on. Can be a constant, column, or function. -- **origin-timestamp**: Optional. Starting point used to determine bin boundaries. If not specified defaults 1970-01-01T00:00:00Z (the UNIX epoch in UTC). - -The following intervals are supported: - -- nanoseconds -- microseconds -- milliseconds -- seconds -- minutes -- hours -- days -- weeks -- months -- years -- century - -### `date_format` - -_Alias of [to_char](#to_char)._ - -### `date_part` - -Returns the specified part of the date as an integer. - -``` -date_part(part, expression) -``` - -#### Arguments - -- **part**: Part of the date to return. The following date parts are supported: - - - year - - quarter (emits value in inclusive range [1, 4] based on which quartile of the year the date is in) - - month - - week (week of the year) - - day (day of the month) - - hour - - minute - - second - - millisecond - - microsecond - - nanosecond - - dow (day of the week) - - doy (day of the year) - - epoch (seconds since Unix epoch) - -- **expression**: Time expression to operate on. Can be a constant, column, or function. - -#### Alternative Syntax - -```sql -extract(field FROM source) -``` - -#### Aliases - -- datepart - -### `date_trunc` - -Truncates a timestamp value to a specified precision. - -``` -date_trunc(precision, expression) -``` - -#### Arguments - -- **precision**: Time precision to truncate to. The following precisions are supported: - - - year / YEAR - - quarter / QUARTER - - month / MONTH - - week / WEEK - - day / DAY - - hour / HOUR - - minute / MINUTE - - second / SECOND - -- **expression**: Time expression to operate on. Can be a constant, column, or function. - -#### Aliases - -- datetrunc - -### `datepart` - -_Alias of [date_part](#date_part)._ - -### `datetrunc` - -_Alias of [date_trunc](#date_trunc)._ - -### `from_unixtime` - -Converts an integer to RFC3339 timestamp format (`YYYY-MM-DDT00:00:00.000000000Z`). Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`) return the corresponding timestamp. - -``` -from_unixtime(expression) -``` - -#### Arguments - -- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. - -### `make_date` - -Make a date from year/month/day component parts. - -``` -make_date(year, month, day) -``` - -#### Arguments - -- **year**: Year to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators. -- **month**: Month to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators. -- **day**: Day to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators. - -#### Example - -```sql -> select make_date(2023, 1, 31); -+-------------------------------------------+ -| make_date(Int64(2023),Int64(1),Int64(31)) | -+-------------------------------------------+ -| 2023-01-31 | -+-------------------------------------------+ -> select make_date('2023', '01', '31'); -+-----------------------------------------------+ -| make_date(Utf8("2023"),Utf8("01"),Utf8("31")) | -+-----------------------------------------------+ -| 2023-01-31 | -+-----------------------------------------------+ -``` - -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/make_date.rs) - -### `now` - -Returns the current UTC timestamp. - -The `now()` return value is determined at query time and will return the same timestamp, no matter when in the query plan the function executes. - -``` -now() -``` - -#### Aliases - -- current_timestamp - -### `to_char` - -Returns a string representation of a date, time, timestamp or duration based on a [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html). Unlike the PostgreSQL equivalent of this function numerical formatting is not supported. - -``` -to_char(expression, format) -``` - -#### Arguments - -- **expression**: Expression to operate on. Can be a constant, column, or function that results in a date, time, timestamp or duration. -- **format**: A [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) string to use to convert the expression. -- **day**: Day to use when making the date. Can be a constant, column or function, and any combination of arithmetic operators. - -#### Example - -```sql -> select to_char('2023-03-01'::date, '%d-%m-%Y'); -+----------------------------------------------+ -| to_char(Utf8("2023-03-01"),Utf8("%d-%m-%Y")) | -+----------------------------------------------+ -| 01-03-2023 | -+----------------------------------------------+ -``` - -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_char.rs) - -#### Aliases - -- date_format - -### `to_date` - -Converts a value to a date (`YYYY-MM-DD`). -Supports strings, integer and double types as input. -Strings are parsed as YYYY-MM-DD (e.g. '2023-07-20') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. -Integers and doubles are interpreted as days since the unix epoch (`1970-01-01T00:00:00Z`). -Returns the corresponding date. - -Note: `to_date` returns Date32, which represents its values as the number of days since unix epoch(`1970-01-01`) stored as signed 32 bit value. The largest supported date value is `9999-12-31`. - -``` -to_date('2017-05-31', '%Y-%m-%d') -``` - -#### Arguments - -- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order - they appear with the first successful one being returned. If none of the formats successfully parse the expression - an error will be returned. - -#### Example - -```sql -> select to_date('2023-01-31'); -+-----------------------------+ -| to_date(Utf8("2023-01-31")) | -+-----------------------------+ -| 2023-01-31 | -+-----------------------------+ -> select to_date('2023/01/31', '%Y-%m-%d', '%Y/%m/%d'); -+---------------------------------------------------------------+ -| to_date(Utf8("2023/01/31"),Utf8("%Y-%m-%d"),Utf8("%Y/%m/%d")) | -+---------------------------------------------------------------+ -| 2023-01-31 | -+---------------------------------------------------------------+ -``` - -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_date.rs) - -### `to_local_time` - -Converts a timestamp with a timezone to a timestamp without a timezone (with no offset or timezone information). This function handles daylight saving time changes. - -``` -to_local_time(expression) -``` - -#### Arguments - -- **expression**: Time expression to operate on. Can be a constant, column, or function. - -#### Example - -```sql -> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp); -+---------------------------------------------+ -| to_local_time(Utf8("2024-04-01T00:00:20Z")) | -+---------------------------------------------+ -| 2024-04-01T00:00:20 | -+---------------------------------------------+ - -> SELECT to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels'); -+---------------------------------------------+ -| to_local_time(Utf8("2024-04-01T00:00:20Z")) | -+---------------------------------------------+ -| 2024-04-01T00:00:20 | -+---------------------------------------------+ - -> SELECT - time, - arrow_typeof(time) as type, - to_local_time(time) as to_local_time, - arrow_typeof(to_local_time(time)) as to_local_time_type -FROM ( - SELECT '2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels' AS time -); -+---------------------------+------------------------------------------------+---------------------+-----------------------------+ -| time | type | to_local_time | to_local_time_type | -+---------------------------+------------------------------------------------+---------------------+-----------------------------+ -| 2024-04-01T00:00:20+02:00 | Timestamp(Nanosecond, Some("Europe/Brussels")) | 2024-04-01T00:00:20 | Timestamp(Nanosecond, None) | -+---------------------------+------------------------------------------------+---------------------+-----------------------------+ - -# combine `to_local_time()` with `date_bin()` to bin on boundaries in the timezone rather -# than UTC boundaries - -> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AS date_bin; -+---------------------+ -| date_bin | -+---------------------+ -| 2024-04-01T00:00:00 | -+---------------------+ - -> SELECT date_bin(interval '1 day', to_local_time('2024-04-01T00:00:20Z'::timestamp AT TIME ZONE 'Europe/Brussels')) AT TIME ZONE 'Europe/Brussels' AS date_bin_with_timezone; -+---------------------------+ -| date_bin_with_timezone | -+---------------------------+ -| 2024-04-01T00:00:00+02:00 | -+---------------------------+ -``` - -### `to_timestamp` - -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00Z`). Supports strings, integer, unsigned integer, and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats] are provided. Integers, unsigned integers, and doubles are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. - -Note: `to_timestamp` returns `Timestamp(Nanosecond)`. The supported range for integer input is between `-9223372037` and `9223372036`. Supported range for string input is between `1677-09-21T00:12:44.0` and `2262-04-11T23:47:16.0`. Please use `to_timestamp_seconds` for the input outside of supported bounds. - -``` -to_timestamp(expression[, ..., format_n]) -``` - -#### Arguments - -- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. -- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. - -#### Example - -```sql -> select to_timestamp('2023-01-31T09:26:56.123456789-05:00'); -+-----------------------------------------------------------+ -| to_timestamp(Utf8("2023-01-31T09:26:56.123456789-05:00")) | -+-----------------------------------------------------------+ -| 2023-01-31T14:26:56.123456789 | -+-----------------------------------------------------------+ -> select to_timestamp('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); -+--------------------------------------------------------------------------------------------------------+ -| to_timestamp(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | -+--------------------------------------------------------------------------------------------------------+ -| 2023-05-17T03:59:00.123456789 | -+--------------------------------------------------------------------------------------------------------+ -``` - -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) - -### `to_timestamp_micros` - -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as microseconds since the unix epoch (`1970-01-01T00:00:00Z`) Returns the corresponding timestamp. - -``` -to_timestamp_micros(expression[, ..., format_n]) -``` - -#### Arguments - -- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. -- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. - -#### Example - -```sql -> select to_timestamp_micros('2023-01-31T09:26:56.123456789-05:00'); -+------------------------------------------------------------------+ -| to_timestamp_micros(Utf8("2023-01-31T09:26:56.123456789-05:00")) | -+------------------------------------------------------------------+ -| 2023-01-31T14:26:56.123456 | -+------------------------------------------------------------------+ -> select to_timestamp_micros('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); -+---------------------------------------------------------------------------------------------------------------+ -| to_timestamp_micros(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | -+---------------------------------------------------------------------------------------------------------------+ -| 2023-05-17T03:59:00.123456 | -+---------------------------------------------------------------------------------------------------------------+ -``` - -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) - -### `to_timestamp_millis` - -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. Integers and unsigned integers are interpreted as milliseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. - -``` -to_timestamp_millis(expression[, ..., format_n]) -``` - -#### Arguments - -- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. -- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. - -#### Example - -```sql -> select to_timestamp_millis('2023-01-31T09:26:56.123456789-05:00'); -+------------------------------------------------------------------+ -| to_timestamp_millis(Utf8("2023-01-31T09:26:56.123456789-05:00")) | -+------------------------------------------------------------------+ -| 2023-01-31T14:26:56.123 | -+------------------------------------------------------------------+ -> select to_timestamp_millis('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); -+---------------------------------------------------------------------------------------------------------------+ -| to_timestamp_millis(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | -+---------------------------------------------------------------------------------------------------------------+ -| 2023-05-17T03:59:00.123 | -+---------------------------------------------------------------------------------------------------------------+ -``` - -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) - -### `to_timestamp_nanos` - -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000000000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as nanoseconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. - -``` -to_timestamp_nanos(expression[, ..., format_n]) -``` - -#### Arguments - -- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. -- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. - -#### Example - -```sql -> select to_timestamp_nanos('2023-01-31T09:26:56.123456789-05:00'); -+-----------------------------------------------------------------+ -| to_timestamp_nanos(Utf8("2023-01-31T09:26:56.123456789-05:00")) | -+-----------------------------------------------------------------+ -| 2023-01-31T14:26:56.123456789 | -+-----------------------------------------------------------------+ -> select to_timestamp_nanos('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); -+--------------------------------------------------------------------------------------------------------------+ -| to_timestamp_nanos(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | -+--------------------------------------------------------------------------------------------------------------+ -| 2023-05-17T03:59:00.123456789 | -+---------------------------------------------------------------------------------------------------------------+ -``` - -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) - -### `to_timestamp_seconds` - -Converts a value to a timestamp (`YYYY-MM-DDT00:00:00.000Z`). Supports strings, integer, and unsigned integer types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html)s are provided. Integers and unsigned integers are interpreted as seconds since the unix epoch (`1970-01-01T00:00:00Z`). Returns the corresponding timestamp. - -``` -to_timestamp_seconds(expression[, ..., format_n]) -``` - -#### Arguments - -- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. -- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. - -#### Example - -```sql -> select to_timestamp_seconds('2023-01-31T09:26:56.123456789-05:00'); -+-------------------------------------------------------------------+ -| to_timestamp_seconds(Utf8("2023-01-31T09:26:56.123456789-05:00")) | -+-------------------------------------------------------------------+ -| 2023-01-31T14:26:56 | -+-------------------------------------------------------------------+ -> select to_timestamp_seconds('03:59:00.123456789 05-17-2023', '%c', '%+', '%H:%M:%S%.f %m-%d-%Y'); -+----------------------------------------------------------------------------------------------------------------+ -| to_timestamp_seconds(Utf8("03:59:00.123456789 05-17-2023"),Utf8("%c"),Utf8("%+"),Utf8("%H:%M:%S%.f %m-%d-%Y")) | -+----------------------------------------------------------------------------------------------------------------+ -| 2023-05-17T03:59:00 | -+----------------------------------------------------------------------------------------------------------------+ -``` - -Additional examples can be found [here](https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/to_timestamp.rs) - -### `to_unixtime` - -Converts a value to seconds since the unix epoch (`1970-01-01T00:00:00Z`). Supports strings, dates, timestamps and double types as input. Strings are parsed as RFC3339 (e.g. '2023-07-20T05:44:00') if no [Chrono formats](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) are provided. - -``` -to_unixtime(expression[, ..., format_n]) -``` - -#### Arguments - -- **expression**: Expression to operate on. Can be a constant, column, or function, and any combination of arithmetic operators. -- **format_n**: Optional [Chrono format](https://docs.rs/chrono/latest/chrono/format/strftime/index.html) strings to use to parse the expression. Formats will be tried in the order they appear with the first successful one being returned. If none of the formats successfully parse the expression an error will be returned. - -#### Example - -```sql -> select to_unixtime('2020-09-08T12:00:00+00:00'); -+------------------------------------------------+ -| to_unixtime(Utf8("2020-09-08T12:00:00+00:00")) | -+------------------------------------------------+ -| 1599566400 | -+------------------------------------------------+ -> select to_unixtime('01-14-2023 01:01:30+05:30', '%q', '%d-%m-%Y %H/%M/%S', '%+', '%m-%d-%Y %H:%M:%S%#z'); -+-----------------------------------------------------------------------------------------------------------------------------+ -| to_unixtime(Utf8("01-14-2023 01:01:30+05:30"),Utf8("%q"),Utf8("%d-%m-%Y %H/%M/%S"),Utf8("%+"),Utf8("%m-%d-%Y %H:%M:%S%#z")) | -+-----------------------------------------------------------------------------------------------------------------------------+ -| 1673638290 | -+-----------------------------------------------------------------------------------------------------------------------------+ -``` - -### `today` - -_Alias of [current_date](#current_date)._ - -## Array Functions - -- [array_any_value](#array_any_value) -- [array_append](#array_append) -- [array_cat](#array_cat) -- [array_concat](#array_concat) -- [array_contains](#array_contains) -- [array_dims](#array_dims) -- [array_distance](#array_distance) -- [array_distinct](#array_distinct) -- [array_element](#array_element) -- [array_empty](#array_empty) -- [array_except](#array_except) -- [array_extract](#array_extract) -- [array_has](#array_has) -- [array_has_all](#array_has_all) -- [array_has_any](#array_has_any) -- [array_indexof](#array_indexof) -- [array_intersect](#array_intersect) -- [array_join](#array_join) -- [array_length](#array_length) -- [array_ndims](#array_ndims) -- [array_pop_back](#array_pop_back) -- [array_pop_front](#array_pop_front) -- [array_position](#array_position) -- [array_positions](#array_positions) -- [array_prepend](#array_prepend) -- [array_push_back](#array_push_back) -- [array_push_front](#array_push_front) -- [array_remove](#array_remove) -- [array_remove_all](#array_remove_all) -- [array_remove_n](#array_remove_n) -- [array_repeat](#array_repeat) -- [array_replace](#array_replace) -- [array_replace_all](#array_replace_all) -- [array_replace_n](#array_replace_n) -- [array_resize](#array_resize) -- [array_reverse](#array_reverse) -- [array_slice](#array_slice) -- [array_sort](#array_sort) -- [array_to_string](#array_to_string) -- [array_union](#array_union) -- [cardinality](#cardinality) -- [empty](#empty) -- [flatten](#flatten) -- [generate_series](#generate_series) -- [list_any_value](#list_any_value) -- [list_append](#list_append) -- [list_cat](#list_cat) -- [list_concat](#list_concat) -- [list_contains](#list_contains) -- [list_dims](#list_dims) -- [list_distance](#list_distance) -- [list_distinct](#list_distinct) -- [list_element](#list_element) -- [list_empty](#list_empty) -- [list_except](#list_except) -- [list_extract](#list_extract) -- [list_has](#list_has) -- [list_has_all](#list_has_all) -- [list_has_any](#list_has_any) -- [list_indexof](#list_indexof) -- [list_intersect](#list_intersect) -- [list_join](#list_join) -- [list_length](#list_length) -- [list_ndims](#list_ndims) -- [list_pop_back](#list_pop_back) -- [list_pop_front](#list_pop_front) -- [list_position](#list_position) -- [list_positions](#list_positions) -- [list_prepend](#list_prepend) -- [list_push_back](#list_push_back) -- [list_push_front](#list_push_front) -- [list_remove](#list_remove) -- [list_remove_all](#list_remove_all) -- [list_remove_n](#list_remove_n) -- [list_repeat](#list_repeat) -- [list_replace](#list_replace) -- [list_replace_all](#list_replace_all) -- [list_replace_n](#list_replace_n) -- [list_resize](#list_resize) -- [list_reverse](#list_reverse) -- [list_slice](#list_slice) -- [list_sort](#list_sort) -- [list_to_string](#list_to_string) -- [list_union](#list_union) -- [make_array](#make_array) -- [make_list](#make_list) -- [range](#range) -- [string_to_array](#string_to_array) -- [string_to_list](#string_to_list) - -### `array_any_value` - -Extracts the element with the index n from the array. - -``` -array_element(array, index) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **index**: Index to extract the element from the array. - -#### Example - -```sql -> select array_element([1, 2, 3, 4], 3); -+-----------------------------------------+ -| array_element(List([1,2,3,4]),Int64(3)) | -+-----------------------------------------+ -| 3 | -+-----------------------------------------+ -``` - -#### Aliases - -- list_any_value - -### `array_append` - -Appends an element to the end of an array. - -``` -array_append(array, element) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **element**: Element to append to the array. - -#### Example - -```sql -> select array_append([1, 2, 3], 4); -+--------------------------------------+ -| array_append(List([1,2,3]),Int64(4)) | -+--------------------------------------+ -| [1, 2, 3, 4] | -+--------------------------------------+ -``` - -#### Aliases - -- list_append -- array_push_back -- list_push_back - -### `array_cat` - -_Alias of [array_concat](#array_concat)._ - -### `array_concat` - -Appends an element to the end of an array. - -``` -array_append(array, element) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **element**: Element to append to the array. - -#### Example - -```sql -> select array_append([1, 2, 3], 4); -+--------------------------------------+ -| array_append(List([1,2,3]),Int64(4)) | -+--------------------------------------+ -| [1, 2, 3, 4] | -+--------------------------------------+ -``` - -#### Aliases - -- array_cat -- list_concat -- list_cat - -### `array_contains` - -_Alias of [array_has](#array_has)._ - -### `array_dims` - -Returns an array of the array's dimensions. - -``` -array_dims(array) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. - -#### Example - -```sql -> select array_dims([[1, 2, 3], [4, 5, 6]]); -+---------------------------------+ -| array_dims(List([1,2,3,4,5,6])) | -+---------------------------------+ -| [2, 3] | -+---------------------------------+ -``` - -#### Aliases - -- list_dims - -### `array_distance` - -Returns the Euclidean distance between two input arrays of equal length. - -``` -array_distance(array1, array2) -``` - -#### Arguments - -- **array1**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **array2**: Array expression. Can be a constant, column, or function, and any combination of array operators. - -#### Example - -```sql -> select array_distance([1, 2], [1, 4]); -+------------------------------------+ -| array_distance(List([1,2], [1,4])) | -+------------------------------------+ -| 2.0 | -+------------------------------------+ -``` - -#### Aliases - -- list_distance - -### `array_distinct` - -Returns distinct values from the array after removing duplicates. - -``` -array_distinct(array) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. - -#### Example - -```sql -> select array_distinct([1, 3, 2, 3, 1, 2, 4]); -+---------------------------------+ -| array_distinct(List([1,2,3,4])) | -+---------------------------------+ -| [1, 2, 3, 4] | -+---------------------------------+ -``` - -#### Aliases - -- list_distinct - -### `array_element` - -Extracts the element with the index n from the array. - -``` -array_element(array, index) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **index**: Index to extract the element from the array. - -#### Example - -```sql -> select array_element([1, 2, 3, 4], 3); -+-----------------------------------------+ -| array_element(List([1,2,3,4]),Int64(3)) | -+-----------------------------------------+ -| 3 | -+-----------------------------------------+ -``` - -#### Aliases - -- array_extract -- list_element -- list_extract - -### `array_empty` - -_Alias of [empty](#empty)._ - -### `array_except` - -Returns an array of the elements that appear in the first array but not in the second. - -``` -array_except(array1, array2) -``` - -#### Arguments - -- **array1**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **array2**: Array expression. Can be a constant, column, or function, and any combination of array operators. - -#### Example - -```sql -> select array_except([1, 2, 3, 4], [5, 6, 3, 4]); -+----------------------------------------------------+ -| array_except([1, 2, 3, 4], [5, 6, 3, 4]); | -+----------------------------------------------------+ -| [1, 2] | -+----------------------------------------------------+ -> select array_except([1, 2, 3, 4], [3, 4, 5, 6]); -+----------------------------------------------------+ -| array_except([1, 2, 3, 4], [3, 4, 5, 6]); | -+----------------------------------------------------+ -| [1, 2] | -+----------------------------------------------------+ -``` - -#### Aliases - -- list_except - -### `array_extract` - -_Alias of [array_element](#array_element)._ - -### `array_has` - -Returns true if the array contains the element. - -``` -array_has(array, element) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **element**: Scalar or Array expression. Can be a constant, column, or function, and any combination of array operators. - -#### Example - -```sql -> select array_has([1, 2, 3], 2); -+-----------------------------+ -| array_has(List([1,2,3]), 2) | -+-----------------------------+ -| true | -+-----------------------------+ -``` - -#### Aliases - -- list_has -- array_contains -- list_contains - -### `array_has_all` - -Returns true if the array contains the element. - -``` -array_has(array, element) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **element**: Scalar or Array expression. Can be a constant, column, or function, and any combination of array operators. - -#### Example - -```sql -> select array_has([1, 2, 3], 2); -+-----------------------------+ -| array_has(List([1,2,3]), 2) | -+-----------------------------+ -| true | -+-----------------------------+ -``` - -#### Aliases - -- list_has_all - -### `array_has_any` - -Returns true if the array contains the element. - -``` -array_has(array, element) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **element**: Scalar or Array expression. Can be a constant, column, or function, and any combination of array operators. - -#### Example - -```sql -> select array_has([1, 2, 3], 2); -+-----------------------------+ -| array_has(List([1,2,3]), 2) | -+-----------------------------+ -| true | -+-----------------------------+ -``` - -#### Aliases - -- list_has_any - -### `array_indexof` - -_Alias of [array_position](#array_position)._ - -### `array_intersect` - -Returns distinct values from the array after removing duplicates. - -``` -array_distinct(array) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. - -#### Example - -```sql -> select array_distinct([1, 3, 2, 3, 1, 2, 4]); -+---------------------------------+ -| array_distinct(List([1,2,3,4])) | -+---------------------------------+ -| [1, 2, 3, 4] | -+---------------------------------+ -``` - -#### Aliases - -- list_intersect - -### `array_join` - -_Alias of [array_to_string](#array_to_string)._ - -### `array_length` - -Returns the length of the array dimension. - -``` -array_length(array, dimension) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **dimension**: Array dimension. - -#### Example - -```sql -> select array_length([1, 2, 3, 4, 5], 1); -+-------------------------------------------+ -| array_length(List([1,2,3,4,5]), 1) | -+-------------------------------------------+ -| 5 | -+-------------------------------------------+ -``` - -#### Aliases - -- list_length - -### `array_ndims` - -Returns an array of the array's dimensions. - -``` -array_dims(array) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. - -#### Example - -```sql -> select array_dims([[1, 2, 3], [4, 5, 6]]); -+---------------------------------+ -| array_dims(List([1,2,3,4,5,6])) | -+---------------------------------+ -| [2, 3] | -+---------------------------------+ -``` - -#### Aliases - -- list_ndims - -### `array_pop_back` - -Extracts the element with the index n from the array. - -``` -array_element(array, index) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **index**: Index to extract the element from the array. - -#### Example - -```sql -> select array_element([1, 2, 3, 4], 3); -+-----------------------------------------+ -| array_element(List([1,2,3,4]),Int64(3)) | -+-----------------------------------------+ -| 3 | -+-----------------------------------------+ -``` - -#### Aliases - -- list_pop_back - -### `array_pop_front` - -Extracts the element with the index n from the array. - -``` -array_element(array, index) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **index**: Index to extract the element from the array. - -#### Example - -```sql -> select array_element([1, 2, 3, 4], 3); -+-----------------------------------------+ -| array_element(List([1,2,3,4]),Int64(3)) | -+-----------------------------------------+ -| 3 | -+-----------------------------------------+ -``` - -#### Aliases - -- list_pop_front - -### `array_position` - -Returns the position of the first occurrence of the specified element in the array. - -``` -array_position(array, element) -array_position(array, element, index) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **element**: Element to search for position in the array. -- **index**: Index at which to start searching. - -#### Example - -```sql -> select array_position([1, 2, 2, 3, 1, 4], 2); -+----------------------------------------------+ -| array_position(List([1,2,2,3,1,4]),Int64(2)) | -+----------------------------------------------+ -| 2 | -+----------------------------------------------+ -> select array_position([1, 2, 2, 3, 1, 4], 2, 3); -+----------------------------------------------------+ -| array_position(List([1,2,2,3,1,4]),Int64(2), Int64(3)) | -+----------------------------------------------------+ -| 3 | -+----------------------------------------------------+ -``` - -#### Aliases - -- list_position -- array_indexof -- list_indexof - -### `array_positions` - -Returns the position of the first occurrence of the specified element in the array. - -``` -array_position(array, element) -array_position(array, element, index) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **element**: Element to search for position in the array. -- **index**: Index at which to start searching. - -#### Example - -```sql -> select array_position([1, 2, 2, 3, 1, 4], 2); -+----------------------------------------------+ -| array_position(List([1,2,2,3,1,4]),Int64(2)) | -+----------------------------------------------+ -| 2 | -+----------------------------------------------+ -> select array_position([1, 2, 2, 3, 1, 4], 2, 3); -+----------------------------------------------------+ -| array_position(List([1,2,2,3,1,4]),Int64(2), Int64(3)) | -+----------------------------------------------------+ -| 3 | -+----------------------------------------------------+ -``` - -#### Aliases - -- list_positions - -### `array_prepend` - -Appends an element to the end of an array. - -``` -array_append(array, element) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **element**: Element to append to the array. - -#### Example - -```sql -> select array_append([1, 2, 3], 4); -+--------------------------------------+ -| array_append(List([1,2,3]),Int64(4)) | -+--------------------------------------+ -| [1, 2, 3, 4] | -+--------------------------------------+ -``` - -#### Aliases - -- list_prepend -- array_push_front -- list_push_front - -### `array_push_back` - -_Alias of [array_append](#array_append)._ - -### `array_push_front` - -_Alias of [array_prepend](#array_prepend)._ - -### `array_remove` - -Removes the first element from the array equal to the given value. - -``` -array_remove(array, element) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **element**: Element to be removed from the array. - -#### Example - -```sql -> select array_remove([1, 2, 2, 3, 2, 1, 4], 2); -+----------------------------------------------+ -| array_remove(List([1,2,2,3,2,1,4]),Int64(2)) | -+----------------------------------------------+ -| [1, 2, 3, 2, 1, 4] | -+----------------------------------------------+ -``` - -#### Aliases - -- list_remove - -### `array_remove_all` - -Removes the first element from the array equal to the given value. - -``` -array_remove(array, element) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **element**: Element to be removed from the array. - -#### Example - -```sql -> select array_remove([1, 2, 2, 3, 2, 1, 4], 2); -+----------------------------------------------+ -| array_remove(List([1,2,2,3,2,1,4]),Int64(2)) | -+----------------------------------------------+ -| [1, 2, 3, 2, 1, 4] | -+----------------------------------------------+ -``` - -#### Aliases - -- list_remove_all - -### `array_remove_n` - -Removes the first element from the array equal to the given value. - -``` -array_remove(array, element) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **element**: Element to be removed from the array. - -#### Example - -```sql -> select array_remove([1, 2, 2, 3, 2, 1, 4], 2); -+----------------------------------------------+ -| array_remove(List([1,2,2,3,2,1,4]),Int64(2)) | -+----------------------------------------------+ -| [1, 2, 3, 2, 1, 4] | -+----------------------------------------------+ -``` - -#### Aliases - -- list_remove_n - -### `array_repeat` - -Returns an array containing element `count` times. - -``` -array_repeat(element, count) -``` - -#### Arguments - -- **element**: Element expression. Can be a constant, column, or function, and any combination of array operators. -- **count**: Value of how many times to repeat the element. - -#### Example - -```sql -> select array_repeat(1, 3); -+---------------------------------+ -| array_repeat(Int64(1),Int64(3)) | -+---------------------------------+ -| [1, 1, 1] | -+---------------------------------+ -> select array_repeat([1, 2], 2); -+------------------------------------+ -| array_repeat(List([1,2]),Int64(2)) | -+------------------------------------+ -| [[1, 2], [1, 2]] | -+------------------------------------+ -``` - -#### Aliases - -- list_repeat - -### `array_replace` - -Replaces the first `max` occurrences of the specified element with another specified element. - -``` -array_replace_n(array, from, to, max) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **from**: Initial element. -- **to**: Final element. -- **max**: Number of first occurrences to replace. - -#### Example - -```sql -> select array_replace_n([1, 2, 2, 3, 2, 1, 4], 2, 5, 2); -+-------------------------------------------------------------------+ -| array_replace_n(List([1,2,2,3,2,1,4]),Int64(2),Int64(5),Int64(2)) | -+-------------------------------------------------------------------+ -| [1, 5, 5, 3, 2, 1, 4] | -+-------------------------------------------------------------------+ -``` - -#### Aliases - -- list_replace - -### `array_replace_all` - -Replaces the first `max` occurrences of the specified element with another specified element. - -``` -array_replace_n(array, from, to, max) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **from**: Initial element. -- **to**: Final element. -- **max**: Number of first occurrences to replace. - -#### Example - -```sql -> select array_replace_n([1, 2, 2, 3, 2, 1, 4], 2, 5, 2); -+-------------------------------------------------------------------+ -| array_replace_n(List([1,2,2,3,2,1,4]),Int64(2),Int64(5),Int64(2)) | -+-------------------------------------------------------------------+ -| [1, 5, 5, 3, 2, 1, 4] | -+-------------------------------------------------------------------+ -``` - -#### Aliases - -- list_replace_all - -### `array_replace_n` - -Replaces the first `max` occurrences of the specified element with another specified element. - -``` -array_replace_n(array, from, to, max) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **from**: Initial element. -- **to**: Final element. -- **max**: Number of first occurrences to replace. - -#### Example - -```sql -> select array_replace_n([1, 2, 2, 3, 2, 1, 4], 2, 5, 2); -+-------------------------------------------------------------------+ -| array_replace_n(List([1,2,2,3,2,1,4]),Int64(2),Int64(5),Int64(2)) | -+-------------------------------------------------------------------+ -| [1, 5, 5, 3, 2, 1, 4] | -+-------------------------------------------------------------------+ -``` - -#### Aliases - -- list_replace_n - -### `array_resize` - -Resizes the list to contain size elements. Initializes new elements with value or empty if value is not set. - -``` -array_resize(array, size, value) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **size**: New size of given array. -- **value**: Defines new elements' value or empty if value is not set. - -#### Example - -```sql -> select array_resize([1, 2, 3], 5, 0); -+-------------------------------------+ -| array_resize(List([1,2,3],5,0)) | -+-------------------------------------+ -| [1, 2, 3, 0, 0] | -+-------------------------------------+ -``` - -#### Aliases - -- list_resize - -### `array_reverse` - -Returns the array with the order of the elements reversed. - -``` -array_reverse(array) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. - -#### Example - -```sql -> select array_reverse([1, 2, 3, 4]); -+------------------------------------------------------------+ -| array_reverse(List([1, 2, 3, 4])) | -+------------------------------------------------------------+ -| [4, 3, 2, 1] | -+------------------------------------------------------------+ -``` - -#### Aliases - -- list_reverse - -### `array_slice` - -Extracts the element with the index n from the array. - -``` -array_element(array, index) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **index**: Index to extract the element from the array. - -#### Example - -```sql -> select array_element([1, 2, 3, 4], 3); -+-----------------------------------------+ -| array_element(List([1,2,3,4]),Int64(3)) | -+-----------------------------------------+ -| 3 | -+-----------------------------------------+ -``` - -#### Aliases - -- list_slice - -### `array_sort` - -Sort array. - -``` -array_sort(array, desc, nulls_first) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **desc**: Whether to sort in descending order(`ASC` or `DESC`). -- **nulls_first**: Whether to sort nulls first(`NULLS FIRST` or `NULLS LAST`). - -#### Example - -```sql -> select array_sort([3, 1, 2]); -+-----------------------------+ -| array_sort(List([3,1,2])) | -+-----------------------------+ -| [1, 2, 3] | -+-----------------------------+ -``` - -#### Aliases - -- list_sort - -### `array_to_string` - -Converts each element to its text representation. - -``` -array_to_string(array, delimiter) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **delimiter**: Array element separator. - -#### Example - -```sql -> select array_to_string([[1, 2, 3, 4], [5, 6, 7, 8]], ','); -+----------------------------------------------------+ -| array_to_string(List([1,2,3,4,5,6,7,8]),Utf8(",")) | -+----------------------------------------------------+ -| 1,2,3,4,5,6,7,8 | -+----------------------------------------------------+ -``` - -#### Aliases - -- list_to_string -- array_join -- list_join - -### `array_union` - -Returns distinct values from the array after removing duplicates. - -``` -array_distinct(array) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. - -#### Example - -```sql -> select array_distinct([1, 3, 2, 3, 1, 2, 4]); -+---------------------------------+ -| array_distinct(List([1,2,3,4])) | -+---------------------------------+ -| [1, 2, 3, 4] | -+---------------------------------+ -``` - -#### Aliases - -- list_union - -### `cardinality` - -Returns the total number of elements in the array. - -``` -cardinality(array) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. - -#### Example - -```sql -> select cardinality([[1, 2, 3, 4], [5, 6, 7, 8]]); -+--------------------------------------+ -| cardinality(List([1,2,3,4,5,6,7,8])) | -+--------------------------------------+ -| 8 | -+--------------------------------------+ -``` - -### `empty` - -Returns 1 for an empty array or 0 for a non-empty array. - -``` -empty(array) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. - -#### Example - -```sql -> select empty([1]); -+------------------+ -| empty(List([1])) | -+------------------+ -| 0 | -+------------------+ -``` - -#### Aliases - -- array_empty -- list_empty - -### `flatten` - -Converts an array of arrays to a flat array. - -- Applies to any depth of nested arrays -- Does not change arrays that are already flat - -The flattened array contains all the elements from all source arrays. - -``` -flatten(array) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. - -#### Example - -```sql -> select flatten([[1, 2], [3, 4]]); -+------------------------------+ -| flatten(List([1,2], [3,4])) | -+------------------------------+ -| [1, 2, 3, 4] | -+------------------------------+ -``` - -### `generate_series` - -Similar to the range function, but it includes the upper bound. - -``` -generate_series(start, stop, step) -``` - -#### Arguments - -- **start**: start of the series. Ints, timestamps, dates or string types that can be coerced to Date32 are supported. -- **end**: end of the series (included). Type must be the same as start. -- **step**: increase by step (can not be 0). Steps less than a day are supported only for timestamp ranges. - -#### Example - -```sql -> select generate_series(1,3); -+------------------------------------+ -| generate_series(Int64(1),Int64(3)) | -+------------------------------------+ -| [1, 2, 3] | -+------------------------------------+ -``` - -### `list_any_value` - -_Alias of [array_any_value](#array_any_value)._ - -### `list_append` - -_Alias of [array_append](#array_append)._ - -### `list_cat` - -_Alias of [array_concat](#array_concat)._ - -### `list_concat` - -_Alias of [array_concat](#array_concat)._ - -### `list_contains` - -_Alias of [array_has](#array_has)._ - -### `list_dims` - -_Alias of [array_dims](#array_dims)._ - -### `list_distance` - -_Alias of [array_distance](#array_distance)._ - -### `list_distinct` - -_Alias of [array_distinct](#array_distinct)._ - -### `list_element` - -_Alias of [array_element](#array_element)._ - -### `list_empty` - -_Alias of [empty](#empty)._ - -### `list_except` - -_Alias of [array_except](#array_except)._ - -### `list_extract` - -_Alias of [array_element](#array_element)._ - -### `list_has` - -_Alias of [array_has](#array_has)._ - -### `list_has_all` - -_Alias of [array_has_all](#array_has_all)._ - -### `list_has_any` - -_Alias of [array_has_any](#array_has_any)._ - -### `list_indexof` - -_Alias of [array_position](#array_position)._ - -### `list_intersect` - -_Alias of [array_intersect](#array_intersect)._ - -### `list_join` - -_Alias of [array_to_string](#array_to_string)._ - -### `list_length` - -_Alias of [array_length](#array_length)._ - -### `list_ndims` - -_Alias of [array_ndims](#array_ndims)._ - -### `list_pop_back` - -_Alias of [array_pop_back](#array_pop_back)._ - -### `list_pop_front` - -_Alias of [array_pop_front](#array_pop_front)._ - -### `list_position` - -_Alias of [array_position](#array_position)._ - -### `list_positions` - -_Alias of [array_positions](#array_positions)._ - -### `list_prepend` - -_Alias of [array_prepend](#array_prepend)._ - -### `list_push_back` - -_Alias of [array_append](#array_append)._ - -### `list_push_front` - -_Alias of [array_prepend](#array_prepend)._ - -### `list_remove` - -_Alias of [array_remove](#array_remove)._ - -### `list_remove_all` - -_Alias of [array_remove_all](#array_remove_all)._ - -### `list_remove_n` - -_Alias of [array_remove_n](#array_remove_n)._ - -### `list_repeat` - -_Alias of [array_repeat](#array_repeat)._ - -### `list_replace` - -_Alias of [array_replace](#array_replace)._ - -### `list_replace_all` - -_Alias of [array_replace_all](#array_replace_all)._ - -### `list_replace_n` - -_Alias of [array_replace_n](#array_replace_n)._ - -### `list_resize` - -_Alias of [array_resize](#array_resize)._ - -### `list_reverse` - -_Alias of [array_reverse](#array_reverse)._ - -### `list_slice` - -_Alias of [array_slice](#array_slice)._ - -### `list_sort` - -_Alias of [array_sort](#array_sort)._ - -### `list_to_string` - -_Alias of [array_to_string](#array_to_string)._ - -### `list_union` - -_Alias of [array_union](#array_union)._ - -### `make_array` - -Returns an array using the specified input expressions. - -``` -make_array(expression1[, ..., expression_n]) -``` - -#### Arguments - -- **expression_n**: Expression to include in the output array. Can be a constant, column, or function, and any combination of arithmetic or string operators. - -#### Example - -```sql -> select make_array(1, 2, 3, 4, 5); -+----------------------------------------------------------+ -| make_array(Int64(1),Int64(2),Int64(3),Int64(4),Int64(5)) | -+----------------------------------------------------------+ -| [1, 2, 3, 4, 5] | -+----------------------------------------------------------+ -``` - -#### Aliases - -- make_list - -### `make_list` - -_Alias of [make_array](#make_array)._ - -### `range` - -Returns an Arrow array between start and stop with step. The range start..end contains all values with start <= x < end. It is empty if start >= end. Step cannot be 0. - -``` -range(start, stop, step) -``` - -#### Arguments - -- **start**: Start of the range. Ints, timestamps, dates or string types that can be coerced to Date32 are supported. -- **end**: End of the range (not included). Type must be the same as start. -- **step**: Increase by step (cannot be 0). Steps less than a day are supported only for timestamp ranges. - -#### Example - -```sql -> select range(2, 10, 3); -+-----------------------------------+ -| range(Int64(2),Int64(10),Int64(3))| -+-----------------------------------+ -| [2, 5, 8] | -+-----------------------------------+ - -> select range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH); -+--------------------------------------------------------------+ -| range(DATE '1992-09-01', DATE '1993-03-01', INTERVAL '1' MONTH) | -+--------------------------------------------------------------+ -| [1992-09-01, 1992-10-01, 1992-11-01, 1992-12-01, 1993-01-01, 1993-02-01] | -+--------------------------------------------------------------+ -``` - -### `string_to_array` - -Converts each element to its text representation. - -``` -array_to_string(array, delimiter) -``` - -#### Arguments - -- **array**: Array expression. Can be a constant, column, or function, and any combination of array operators. -- **delimiter**: Array element separator. - -#### Example - -```sql -> select array_to_string([[1, 2, 3, 4], [5, 6, 7, 8]], ','); -+----------------------------------------------------+ -| array_to_string(List([1,2,3,4,5,6,7,8]),Utf8(",")) | -+----------------------------------------------------+ -| 1,2,3,4,5,6,7,8 | -+----------------------------------------------------+ -``` - -#### Aliases - -- string_to_list - -### `string_to_list` - -_Alias of [string_to_array](#string_to_array)._ - -## Struct Functions - -- [named_struct](#named_struct) -- [row](#row) -- [struct](#struct) - -### `named_struct` - -Returns an Arrow struct using the specified name and input expressions pairs. - -``` -named_struct(expression1_name, expression1_input[, ..., expression_n_name, expression_n_input]) -``` - -#### Arguments - -- **expression_n_name**: Name of the column field. Must be a constant string. -- **expression_n_input**: Expression to include in the output struct. Can be a constant, column, or function, and any combination of arithmetic or string operators. - -#### Example - -For example, this query converts two columns `a` and `b` to a single column with -a struct type of fields `field_a` and `field_b`: - -```sql -> select * from t; -+---+---+ -| a | b | -+---+---+ -| 1 | 2 | -| 3 | 4 | -+---+---+ -> select named_struct('field_a', a, 'field_b', b) from t; -+-------------------------------------------------------+ -| named_struct(Utf8("field_a"),t.a,Utf8("field_b"),t.b) | -+-------------------------------------------------------+ -| {field_a: 1, field_b: 2} | -| {field_a: 3, field_b: 4} | -+-------------------------------------------------------+ -``` - -### `row` - -_Alias of [struct](#struct)._ - -### `struct` - -Returns an Arrow struct using the specified input expressions optionally named. -Fields in the returned struct use the optional name or the `cN` naming convention. -For example: `c0`, `c1`, `c2`, etc. - -``` -struct(expression1[, ..., expression_n]) -``` - -#### Arguments - -- **expression1, expression_n**: Expression to include in the output struct. Can be a constant, column, or function, any combination of arithmetic or string operators. - -#### Example - -For example, this query converts two columns `a` and `b` to a single column with -a struct type of fields `field_a` and `c1`: - -```sql -> select * from t; -+---+---+ -| a | b | -+---+---+ -| 1 | 2 | -| 3 | 4 | -+---+---+ - --- use default names `c0`, `c1` -> select struct(a, b) from t; -+-----------------+ -| struct(t.a,t.b) | -+-----------------+ -| {c0: 1, c1: 2} | -| {c0: 3, c1: 4} | -+-----------------+ - --- name the first field `field_a` -select struct(a as field_a, b) from t; -+--------------------------------------------------+ -| named_struct(Utf8("field_a"),t.a,Utf8("c1"),t.b) | -+--------------------------------------------------+ -| {field_a: 1, c1: 2} | -| {field_a: 3, c1: 4} | -+--------------------------------------------------+ -``` - -#### Aliases - -- row - -## Map Functions - -- [element_at](#element_at) -- [map](#map) -- [map_extract](#map_extract) -- [map_keys](#map_keys) -- [map_values](#map_values) - -### `element_at` - -_Alias of [map_extract](#map_extract)._ - -### `map` - -Returns an Arrow map with the specified key-value pairs. - -The `make_map` function creates a map from two lists: one for keys and one for values. Each key must be unique and non-null. - -``` -map(key, value) -map(key: value) -make_map(['key1', 'key2'], ['value1', 'value2']) -``` - -#### Arguments - -- **key**: For `map`: Expression to be used for key. Can be a constant, column, function, or any combination of arithmetic or string operators. - For `make_map`: The list of keys to be used in the map. Each key must be unique and non-null. -- **value**: For `map`: Expression to be used for value. Can be a constant, column, function, or any combination of arithmetic or string operators. - For `make_map`: The list of values to be mapped to the corresponding keys. - -#### Example - -````sql - -- Using map function - SELECT MAP('type', 'test'); - ---- - {type: test} - - SELECT MAP(['POST', 'HEAD', 'PATCH'], [41, 33, null]); - ---- - {POST: 41, HEAD: 33, PATCH: } - - SELECT MAP([[1,2], [3,4]], ['a', 'b']); - ---- - {[1, 2]: a, [3, 4]: b} - - SELECT MAP { 'a': 1, 'b': 2 }; - ---- - {a: 1, b: 2} - - -- Using make_map function - SELECT MAKE_MAP(['POST', 'HEAD'], [41, 33]); - ---- - {POST: 41, HEAD: 33} - - SELECT MAKE_MAP(['key1', 'key2'], ['value1', null]); - ---- - {key1: value1, key2: } - ``` - - -### `map_extract` - -Returns a list containing the value for the given key or an empty list if the key is not present in the map. - -```` - -map_extract(map, key) - -```` -#### Arguments - -- **map**: Map expression. Can be a constant, column, or function, and any combination of map operators. -- **key**: Key to extract from the map. Can be a constant, column, or function, any combination of arithmetic or string operators, or a named expression of the previously listed. - -#### Example - -```sql -SELECT map_extract(MAP {'a': 1, 'b': NULL, 'c': 3}, 'a'); ----- -[1] - -SELECT map_extract(MAP {1: 'one', 2: 'two'}, 2); ----- -['two'] - -SELECT map_extract(MAP {'x': 10, 'y': NULL, 'z': 30}, 'y'); ----- -[] -```` - -#### Aliases - -- element_at - -### `map_keys` - -Returns a list of all keys in the map. - -``` -map_keys(map) -``` - -#### Arguments - -- **map**: Map expression. Can be a constant, column, or function, and any combination of map operators. - -#### Example - -```sql -SELECT map_keys(MAP {'a': 1, 'b': NULL, 'c': 3}); ----- -[a, b, c] - -SELECT map_keys(map([100, 5], [42, 43])); ----- -[100, 5] -``` - -### `map_values` - -Returns a list of all values in the map. - -``` -map_values(map) -``` - -#### Arguments - -- **map**: Map expression. Can be a constant, column, or function, and any combination of map operators. - -#### Example - -```sql -SELECT map_values(MAP {'a': 1, 'b': NULL, 'c': 3}); ----- -[1, , 3] - -SELECT map_values(map([100, 5], [42, 43])); ----- -[42, 43] -``` - -## Hashing Functions - -- [digest](#digest) -- [md5](#md5) -- [sha224](#sha224) -- [sha256](#sha256) -- [sha384](#sha384) -- [sha512](#sha512) - -### `digest` - -Computes the binary hash of an expression using the specified algorithm. - -``` -digest(expression, algorithm) -``` - -#### Arguments - -- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. -- **algorithm**: String expression specifying algorithm to use. Must be one of: -- md5 -- sha224 -- sha256 -- sha384 -- sha512 -- blake2s -- blake2b -- blake3 - -#### Example - -```sql -> select digest('foo', 'sha256'); -+------------------------------------------+ -| digest(Utf8("foo"), Utf8("sha256")) | -+------------------------------------------+ -| | -+------------------------------------------+ -``` - -### `md5` - -Computes an MD5 128-bit checksum for a string expression. - -``` -md5(expression) -``` - -#### Arguments - -- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> select md5('foo'); -+-------------------------------------+ -| md5(Utf8("foo")) | -+-------------------------------------+ -| | -+-------------------------------------+ -``` - -### `sha224` - -Computes the SHA-224 hash of a binary string. - -``` -sha224(expression) -``` - -#### Arguments - -- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> select sha224('foo'); -+------------------------------------------+ -| sha224(Utf8("foo")) | -+------------------------------------------+ -| | -+------------------------------------------+ -``` - -### `sha256` - -Computes the SHA-256 hash of a binary string. - -``` -sha256(expression) -``` - -#### Arguments - -- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> select sha256('foo'); -+--------------------------------------+ -| sha256(Utf8("foo")) | -+--------------------------------------+ -| | -+--------------------------------------+ -``` - -### `sha384` - -Computes the SHA-384 hash of a binary string. - -``` -sha384(expression) -``` - -#### Arguments - -- **expression**: String expression to operate on. Can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> select sha384('foo'); -+-----------------------------------------+ -| sha384(Utf8("foo")) | -+-----------------------------------------+ -| | -+-----------------------------------------+ -``` - -### `sha512` - -Computes the SHA-512 hash of a binary string. - -``` -sha512(expression) -``` - -#### Arguments - -- **expression**: String - -#### Example - -```sql -> select sha512('foo'); -+-------------------------------------------+ -| sha512(Utf8("foo")) | -+-------------------------------------------+ -| | -+-------------------------------------------+ -``` - -## Other Functions - -- [arrow_cast](#arrow_cast) -- [arrow_typeof](#arrow_typeof) -- [get_field](#get_field) -- [version](#version) - -### `arrow_cast` - -Casts a value to a specific Arrow data type. - -``` -arrow_cast(expression, datatype) -``` - -#### Arguments - -- **expression**: Expression to cast. The expression can be a constant, column, or function, and any combination of operators. -- **datatype**: [Arrow data type](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html) name to cast to, as a string. The format is the same as that returned by [`arrow_typeof`] - -#### Example - -```sql -> select arrow_cast(-5, 'Int8') as a, - arrow_cast('foo', 'Dictionary(Int32, Utf8)') as b, - arrow_cast('bar', 'LargeUtf8') as c, - arrow_cast('2023-01-02T12:53:02', 'Timestamp(Microsecond, Some("+08:00"))') as d - ; -+----+-----+-----+---------------------------+ -| a | b | c | d | -+----+-----+-----+---------------------------+ -| -5 | foo | bar | 2023-01-02T12:53:02+08:00 | -+----+-----+-----+---------------------------+ -``` - -### `arrow_typeof` - -Returns the name of the underlying [Arrow data type](https://docs.rs/arrow/latest/arrow/datatypes/enum.DataType.html) of the expression. - -``` -arrow_typeof(expression) -``` - -#### Arguments - -- **expression**: Expression to evaluate. The expression can be a constant, column, or function, and any combination of operators. - -#### Example - -```sql -> select arrow_typeof('foo'), arrow_typeof(1); -+---------------------------+------------------------+ -| arrow_typeof(Utf8("foo")) | arrow_typeof(Int64(1)) | -+---------------------------+------------------------+ -| Utf8 | Int64 | -+---------------------------+------------------------+ -``` - -### `get_field` - -Returns a field within a map or a struct with the given key. -Note: most users invoke `get_field` indirectly via field access -syntax such as `my_struct_col['field_name']` which results in a call to -`get_field(my_struct_col, 'field_name')`. - -``` -get_field(expression1, expression2) -``` - -#### Arguments - -- **expression1**: The map or struct to retrieve a field for. -- **expression2**: The field name in the map or struct to retrieve data for. Must evaluate to a string. - -#### Example - -```sql -> create table t (idx varchar, v varchar) as values ('data','fusion'), ('apache', 'arrow'); -> select struct(idx, v) from t as c; -+-------------------------+ -| struct(c.idx,c.v) | -+-------------------------+ -| {c0: data, c1: fusion} | -| {c0: apache, c1: arrow} | -+-------------------------+ -> select get_field((select struct(idx, v) from t), 'c0'); -+-----------------------+ -| struct(t.idx,t.v)[c0] | -+-----------------------+ -| data | -| apache | -+-----------------------+ -> select get_field((select struct(idx, v) from t), 'c1'); -+-----------------------+ -| struct(t.idx,t.v)[c1] | -+-----------------------+ -| fusion | -| arrow | -+-----------------------+ -``` - -### `version` - -Returns the version of DataFusion. - -``` -version() -``` - -#### Example - -```sql -> select version(); -+--------------------------------------------+ -| version() | -+--------------------------------------------+ -| Apache DataFusion 42.0.0, aarch64 on macos | -+--------------------------------------------+ -```