diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 0000000000..e69de29bb2 diff --git a/404.html b/404.html new file mode 100644 index 0000000000..5506e16c42 --- /dev/null +++ b/404.html @@ -0,0 +1,901 @@ + + + +
+ + + + + + + + + + + + + + +narwhals.DataFrame
Two-dimensional data structure representing data as a table with rows and columns.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
df |
+
+ Any
+ |
+
+
+
+ A pandas-like dataframe (Pandas, cuDF or Modin), a Polars dataframe, + a narwhals DataFrame or a narwhals LazyFrame. + |
+ + required + | +
is_polars |
+
+ bool
+ |
+
+
+
+ if set to |
+
+ False
+ |
+
Examples:
+Constructing a DataFrame from a dictionary:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> df_pl = pl.DataFrame(data)
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(df)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+
columns: list[str]
+
+
+ property
+
+
+Get column names.
+ + + +Examples:
+Get column names.
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df.columns
+['foo', 'bar', 'ham']
+
schema: dict[str, DType]
+
+
+ property
+
+
+Get a dict[column name, DataType].
+ + + +Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df.schema
+OrderedDict({'foo': Int64, 'bar': Float64, 'ham': String})
+
shape: tuple[int, int]
+
+
+ property
+
+
+Get the shape of the DataFrame.
+ + + +Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame({"foo": [1, 2, 3, 4, 5]})
+>>> df = nw.DataFrame(df_pl)
+>>> df.shape
+(5, 1)
+
drop(*columns)
+
+Remove columns from the dataframe.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*columns |
+
+ str | Iterable[str]
+ |
+
+
+
+ Names of the columns that should be removed from the dataframe. + |
+
+ ()
+ |
+
Examples:
+Drop a single column by passing the name of that column.
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> dframe = df.drop("ham")
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 1 ┆ 6.0 │
+│ 2 ┆ 7.0 │
+│ 3 ┆ 8.0 │
+└─────┴─────┘
+
Drop multiple columns by passing a list of column names.
+>>> dframe = df.drop(["bar", "ham"])
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 1)
+┌─────┐
+│ foo │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
Use positional arguments to drop multiple columns.
+>>> dframe = df.drop("foo", "ham")
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 1)
+┌─────┐
+│ bar │
+│ --- │
+│ f64 │
+╞═════╡
+│ 6.0 │
+│ 7.0 │
+│ 8.0 │
+└─────┘
+
filter(*predicates)
+
+Filter the rows in the DataFrame based on one or more predicate expressions.
+The original order of the remaining rows is preserved.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
predicates |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Expression(s) that evaluates to a boolean Series. + |
+
+ ()
+ |
+
Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+
Filter on one condition:
+>>> dframe = df.filter(nw.col("foo") > 1)
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
Filter on multiple conditions, combined with and/or operators:
+>>> dframe = df.filter((nw.col("foo") < 3) & (nw.col("ham") == "a"))
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+
>>> dframe = df.filter((nw.col("foo") == 1) | (nw.col("ham") == "c"))
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
Provide multiple filters using *args
syntax:
>>> dframe = df.filter(
+... nw.col("foo") <= 2,
+... ~nw.col("ham").is_in(["b", "c"]),
+... )
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+
group_by(*keys)
+
+Start a group by operation.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*keys |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) to group by. Accepts multiple columns names as a list. + |
+
+ ()
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
GroupBy |
+ GroupBy
+ |
+
+
+
+ Object which can be used to perform aggregations. + |
+
Examples:
+Group by one column and call agg
to compute the grouped sum of another
+ column.
>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> dframe = df.group_by("a").agg(nw.col("b").sum()).sort("a")
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+
Group by multiple columns by passing a list of column names.
+>>> dframe = df.group_by(["a", "b"]).agg(nw.max("c")).sort("a", "b")
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+│ a ┆ 1 ┆ 5 │
+└─────┴─────┴─────┘
+
head(n)
+
+Get the first n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
n |
+
+ int
+ |
+
+
+
+ Number of rows to return. If a negative value is passed, return all rows
+except the last |
+ + required + | +
Examples:
+Get column names.
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3, 4, 5],
+... "bar": [6, 7, 8, 9, 10],
+... "ham": ["a", "b", "c", "d", "e"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> dframe = df.head(3)
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
Pass a negative value to get all rows except
the last abs(n)
.
>>> dframe = df.head(-3)
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 2 ┆ 7 ┆ b │
+└─────┴─────┴─────┘
+
join(other, *, how='inner', left_on, right_on)
+
+Join in SQL-like fashion.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
other |
+
+ Self
+ |
+
+
+
+ DataFrame to join with. + |
+ + required + | +
how |
+
+ Literal['inner']
+ |
+
+
+
+ {'inner'} + Join strategy. +
|
+
+ 'inner'
+ |
+
left_on |
+
+ str | list[str]
+ |
+
+
+
+ Name(s) of the left join column(s). + |
+ + required + | +
right_on |
+
+ str | list[str]
+ |
+
+
+
+ Name(s) of the right join column(s). + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new joined DataFrame + |
+
Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> other_df_pl = pl.DataFrame(
+... {
+... "apple": ["x", "y", "z"],
+... "ham": ["a", "b", "d"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> other_df = nw.DataFrame(other_df_pl)
+>>> dframe = df.join(other_df, left_on="ham", right_on="ham")
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (2, 4)
+┌─────┬─────┬─────┬───────┐
+│ foo ┆ bar ┆ ham ┆ apple │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str ┆ str │
+╞═════╪═════╪═════╪═══════╡
+│ 1 ┆ 6.0 ┆ a ┆ x │
+│ 2 ┆ 7.0 ┆ b ┆ y │
+└─────┴─────┴─────┴───────┘
+
rename(mapping)
+
+Rename column names.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
mapping |
+
+ dict[str, str]
+ |
+
+
+
+ Key value pairs that map from old name to new name. + |
+ + required + | +
Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]}
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> dframe = df.rename({"foo": "apple"})
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 3)
+┌───────┬─────┬─────┐
+│ apple ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═══════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└───────┴─────┴─────┘
+
select(*exprs, **named_exprs)
+
+Select columns from this DataFrame.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*exprs |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to select, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, + other non-expression inputs are parsed as literals. + |
+
+ ()
+ |
+
**named_exprs |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to select, specified as keyword arguments. + The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Examples:
+Pass the name of a column to select that column.
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> dframe = df.select("foo")
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 1)
+┌─────┐
+│ foo │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
Multiple columns can be selected by passing a list of column names.
+>>> dframe = df.select(["foo", "bar"])
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 6 │
+│ 2 ┆ 7 │
+│ 3 ┆ 8 │
+└─────┴─────┘
+
Multiple columns can also be selected using positional arguments instead of a +list. Expressions are also accepted.
+>>> dframe = df.select(nw.col("foo"), nw.col("bar") + 1)
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+
Use keyword arguments to easily name your expression inputs.
+>>> dframe = df.select(threshold=nw.col('foo')*2)
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 1)
+┌───────────┐
+│ threshold │
+│ --- │
+│ i64 │
+╞═══════════╡
+│ 2 │
+│ 4 │
+│ 6 │
+└───────────┘
+
sort(by, *more_by, descending=False)
+
+Sort the dataframe by the given columns.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
by |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) names to sort by. + |
+ + required + | +
*more_by |
+
+ str
+ |
+
+
+
+ Additional columns to sort by, specified as positional + arguments. + |
+
+ ()
+ |
+
descending |
+
+ bool | Sequence[bool]
+ |
+
+
+
+ Sort in descending order. When sorting by multiple + columns, can be specified per column by passing a + sequence of booleans. + |
+
+ False
+ |
+
Examples:
+Pass a single column name to sort by that column.
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "a": [1, 2, None],
+... "b": [6.0, 5.0, 4.0],
+... "c": ["a", "c", "b"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> dframe = df.sort("a")
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 3)
+┌──────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞══════╪═════╪═════╡
+│ null ┆ 4.0 ┆ b │
+│ 1 ┆ 6.0 ┆ a │
+│ 2 ┆ 5.0 ┆ c │
+└──────┴─────┴─────┘
+
Sort by multiple columns by passing a list of columns.
+>>> dframe = df.sort(["c", "a"], descending=True)
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 3)
+┌──────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞══════╪═════╪═════╡
+│ 2 ┆ 5.0 ┆ c │
+│ null ┆ 4.0 ┆ b │
+│ 1 ┆ 6.0 ┆ a │
+└──────┴─────┴─────┘
+
Or use positional arguments to sort by multiple columns in the same way.
+>>> dframe = df.sort("c", "a", descending=[False, True])
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (3, 3)
+┌──────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞══════╪═════╪═════╡
+│ 1 ┆ 6.0 ┆ a │
+│ null ┆ 4.0 ┆ b │
+│ 2 ┆ 5.0 ┆ c │
+└──────┴─────┴─────┘
+
to_dict(*, as_series=True)
+
+Convert DataFrame to a dictionary mapping column name to values.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
as_series |
+
+ bool
+ |
+
+
+
+ If set to true |
+
+ True
+ |
+
Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "A": [1, 2, 3, 4, 5],
+... "fruits": ["banana", "banana", "apple", "apple", "banana"],
+... "B": [5, 4, 3, 2, 1],
+... "cars": ["beetle", "audi", "beetle", "beetle", "beetle"],
+... "optional": [28, 300, None, 2, -30],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(df)
+shape: (5, 5)
+┌─────┬────────┬─────┬────────┬──────────┐
+│ A ┆ fruits ┆ B ┆ cars ┆ optional │
+│ --- ┆ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ str ┆ i64 ┆ str ┆ i64 │
+╞═════╪════════╪═════╪════════╪══════════╡
+│ 1 ┆ banana ┆ 5 ┆ beetle ┆ 28 │
+│ 2 ┆ banana ┆ 4 ┆ audi ┆ 300 │
+│ 3 ┆ apple ┆ 3 ┆ beetle ┆ null │
+│ 4 ┆ apple ┆ 2 ┆ beetle ┆ 2 │
+│ 5 ┆ banana ┆ 1 ┆ beetle ┆ -30 │
+└─────┴────────┴─────┴────────┴──────────┘
+>>> df.to_dict(as_series=False)
+{'A': [1, 2, 3, 4, 5], 'fruits': ['banana', 'banana', 'apple', 'apple', 'banana'], 'B': [5, 4, 3, 2, 1], 'cars': ['beetle', 'audi', 'beetle', 'beetle', 'beetle'], 'optional': [28, 300, None, 2, -30]}
+>>> df.to_dict(as_series=True)
+{'A': shape: (5,)
+Series: 'A' [i64]
+[
+ 1
+ 2
+ 3
+ 4
+ 5
+], 'fruits': shape: (5,)
+Series: 'fruits' [str]
+[
+ "banana"
+ "banana"
+ "apple"
+ "apple"
+ "banana"
+], 'B': shape: (5,)
+Series: 'B' [i64]
+[
+ 5
+ 4
+ 3
+ 2
+ 1
+], 'cars': shape: (5,)
+Series: 'cars' [str]
+[
+ "beetle"
+ "audi"
+ "beetle"
+ "beetle"
+ "beetle"
+], 'optional': shape: (5,)
+Series: 'optional' [i64]
+[
+ 28
+ 300
+ null
+ 2
+ -30
+]}
+
to_numpy()
+
+Convert this DataFrame to a NumPy ndarray.
+ + + +Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ A NumPy ndarray. + |
+
Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6.5, 7.0, 8.5],
+... "ham": ["a", "b", "c"],
+... },
+... schema_overrides={"foo": pl.UInt8, "bar": pl.Float32},
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+
Export to a standard 2D numpy array.
+>>> df.to_numpy()
+array([[1, 6.5, 'a'],
+ [2, 7.0, 'b'],
+ [3, 8.5, 'c']], dtype=object)
+
to_pandas()
+
+Convert this DataFrame to a pandas DataFrame.
+ + + +Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ A pandas DataFrame. + |
+
This operation requires that pandas
is installed.
Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> df.to_pandas()
+ foo bar ham
+0 1 6.0 a
+1 2 7.0 b
+2 3 8.0 c
+
Null values in numeric columns are converted to NaN
.
>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, None],
+... "bar": [6.0, None, 8.0],
+... "ham": [None, "b", "c"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> df.to_pandas()
+ foo bar ham
+0 1.0 6.0 None
+1 2.0 NaN b
+2 NaN 8.0 c
+
unique(subset)
+
+Drop duplicate rows from this dataframe.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
subset |
+
+ str | list[str]
+ |
+
+
+
+ Column name(s) to consider when identifying duplicate rows. + |
+ + required + | +
Returns:
+Name | Type | +Description | +
---|---|---|
DataFrame |
+ Self
+ |
+
+
+
+ DataFrame with unique rows. + |
+
Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "foo": [1, 2, 3, 1],
+... "bar": ["a", "a", "a", "a"],
+... "ham": ["b", "b", "b", "b"],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> df
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> dframe = df.unique(["bar", "ham"])
+>>> nw.to_native(dframe)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ str ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ a ┆ b │
+└─────┴─────┴─────┘
+>>> dframe = df.unique("foo").sort("foo")
+>>> nw.to_native(dframe)
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ str ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ a ┆ b │
+│ 2 ┆ a ┆ b │
+│ 3 ┆ a ┆ b │
+└─────┴─────┴─────┘
+
with_columns(*exprs, **named_exprs)
+
+Add columns to this DataFrame.
+Added columns will replace existing columns with the same name.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*exprs |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to add, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, other + non-expression inputs are parsed as literals. + |
+
+ ()
+ |
+
**named_exprs |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to add, specified as keyword arguments. + The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
DataFrame |
+ Self
+ |
+
+
+
+ A new DataFrame with the columns added. + |
+
Creating a new DataFrame using this method does not create a new copy of +existing data.
+Examples:
+Pass an expression to add it as a new column.
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pl = pl.DataFrame(
+... {
+... "a": [1, 2, 3, 4],
+... "b": [0.5, 4, 10, 13],
+... "c": [True, True, False, True],
+... }
+... )
+>>> df = nw.DataFrame(df_pl)
+>>> dframe = df.with_columns((nw.col("a") * 2).alias("a*2"))
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (4, 4)
+┌─────┬──────┬───────┬─────┐
+│ a ┆ b ┆ c ┆ a*2 │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ bool ┆ i64 │
+╞═════╪══════╪═══════╪═════╡
+│ 1 ┆ 0.5 ┆ true ┆ 2 │
+│ 2 ┆ 4.0 ┆ true ┆ 4 │
+│ 3 ┆ 10.0 ┆ false ┆ 6 │
+│ 4 ┆ 13.0 ┆ true ┆ 8 │
+└─────┴──────┴───────┴─────┘
+
narwhals.dependencies
get_pandas()
+
+Import pandas (if available - else return None).
+ +get_polars()
+
+Import Polars (if available - else return None).
+ +get_modin()
+
+get_cudf()
+
+get_pyarrow()
+
+narwhals.dtypes
Int64
+
+
+Int32
+
+
+Int16
+
+
+Int8
+
+
+UInt64
+
+
+UInt32
+
+
+UInt16
+
+
+UInt8
+
+
+Float64
+
+
+Float32
+
+
+Boolean
+
+
+String
+
+
+Datetime
+
+
+narwhals.Expr
alias(name)
+
+Rename the expression.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
name |
+
+ str
+ |
+
+
+
+ The new name. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({'a': [1, 2], 'b': [4, 5]})
+>>> df_pl = pl.DataFrame({'a': [1, 2], 'b': [4, 5]})
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.select((nw.col('b')+10).alias('c'))
+... return nw.to_native(df)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ c
+0 14
+1 15
+>>> func(df_pl)
+shape: (2, 1)
+┌─────┐
+│ c │
+│ --- │
+│ i64 │
+╞═════╡
+│ 14 │
+│ 15 │
+└─────┘
+
all()
+
+Return whether all values in the column are True
.
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({'a': [True, False], 'b': [True, True]})
+>>> df_pl = pl.DataFrame({'a': [True, False], 'b': [True, True]})
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.select(nw.col('a', 'b').all())
+... return nw.to_native(df)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 False True
+>>> func(df_pl)
+shape: (1, 2)
+┌───────┬──────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞═══════╪══════╡
+│ false ┆ true │
+└───────┴──────┘
+
any()
+
+Return whether any of the values in the column are True
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({'a': [True, False], 'b': [True, True]})
+>>> df_pl = pl.DataFrame({'a': [True, False], 'b': [True, True]})
+
We define a data-frame agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.select(nw.col('a', 'b').any())
+... return nw.to_native(df)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 True True
+>>> func(df_pl)
+shape: (1, 2)
+┌──────┬──────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ bool ┆ bool │
+╞══════╪══════╡
+│ true ┆ true │
+└──────┴──────┘
+
cast(dtype)
+
+diff()
+
+Returns the difference between each element and the previous one.
+ +pandas may change the dtype here, for example when introducing missing
+values in an integer column. To ensure, that the dtype doesn't change,
+you may want to use fill_null
and cast
. For example, to calculate
+the diff and fill missing values with 0
in a Int64 column, you could
+do:
nw.col('a').diff().fill_null(0).cast(nw.Int64)
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({'a': [1, 1, 3, 5, 5]})
+>>> df_pl = pl.DataFrame({'a': [1, 1, 3, 5, 5]})
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.select(a_diff=nw.col('a').diff())
+... return nw.to_native(df)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a_diff
+0 NaN
+1 0.0
+2 2.0
+3 2.0
+4 0.0
+>>> func(df_pl)
+shape: (5, 1)
+┌────────┐
+│ a_diff │
+│ --- │
+│ i64 │
+╞════════╡
+│ null │
+│ 0 │
+│ 2 │
+│ 2 │
+│ 0 │
+└────────┘
+
drop_nulls()
+
+filter(other)
+
+is_between(lower_bound, upper_bound, closed='both')
+
+is_in(other)
+
+is_null()
+
+Returns a boolean Series indicating which values are null.
+ + + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame(
+... {
+... 'a': [2, 4, None, 3, 5],
+... 'b': [2.0, 4.0, float("nan"), 3.0, 5.0]
+... }
+... )
+>>> df_pl = pl.DataFrame(
+... {
+... 'a': [2, 4, None, 3, 5],
+... 'b': [2.0, 4.0, float("nan"), 3.0, 5.0]
+... }
+... )
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.with_columns(
+... a_is_null = nw.col('a').is_null(),
+... b_is_null = nw.col('b').is_null()
+... )
+... return nw.to_native(df)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b a_is_null b_is_null
+0 2.0 2.0 False False
+1 4.0 4.0 False False
+2 NaN NaN True True
+3 3.0 3.0 False False
+4 5.0 5.0 False False
+
>>> func(df_pl) # nan != null for polars
+shape: (5, 4)
+┌──────┬─────┬───────────┬───────────┐
+│ a ┆ b ┆ a_is_null ┆ b_is_null │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ bool ┆ bool │
+╞══════╪═════╪═══════════╪═══════════╡
+│ 2 ┆ 2.0 ┆ false ┆ false │
+│ 4 ┆ 4.0 ┆ false ┆ false │
+│ null ┆ NaN ┆ true ┆ false │
+│ 3 ┆ 3.0 ┆ false ┆ false │
+│ 5 ┆ 5.0 ┆ false ┆ false │
+└──────┴─────┴───────────┴───────────┘
+
max()
+
+Returns the maximum value(s) from a column(s).
+ + + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({'a': [10, 20], 'b': [50, 100]})
+>>> df_pl = pl.DataFrame({'a': [10, 20], 'b': [50, 100]})
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.select(nw.max('a', 'b'))
+... return nw.to_native(df)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 20 100
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 20 ┆ 100 │
+└─────┴─────┘
+
mean()
+
+Get mean value.
+ + + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({'a': [-1, 0, 1], 'b': [2, 4, 6]})
+>>> df_pl = pl.DataFrame({'a': [-1, 0, 1], 'b': [2, 4, 6]})
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.select(nw.col('a', 'b').mean())
+... return nw.to_native(df)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 0.0 4.0
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞═════╪═════╡
+│ 0.0 ┆ 4.0 │
+└─────┴─────┘
+
min()
+
+Returns the minimum value(s) from a column(s).
+ + + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({'a': [1, 2], 'b': [4, 3]})
+>>> df_pl = pl.DataFrame({'a': [1, 2], 'b': [4, 3]})
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.select(nw.min('a','b'))
+... return nw.to_native(df)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 1 3
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+└─────┴─────┘
+
n_unique()
+
+Returns count of unique values
+ + + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({'a': [1, 2, 3, 4, 5], 'b': [1, 1, 3, 3, 5]})
+>>> df_pl = pl.DataFrame({'a': [1, 2, 3, 4, 5], 'b': [1, 1, 3, 3, 5]})
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.select(nw.col('a', 'b').n_unique())
+... return nw.to_native(df)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 5 3
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ u32 ┆ u32 │
+╞═════╪═════╡
+│ 5 ┆ 3 │
+└─────┴─────┘
+
over(*keys)
+
+Compute expressions over the given groups.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
keys |
+
+ str | Iterable[str]
+ |
+
+
+
+ Names of columns to compute window expression over.
+ Must be names of columns, as opposed to expressions -
+ so, this is a bit less flexible than Polars' |
+
+ ()
+ |
+
Examples:
+>>> import narwhals as nw
+>>> import pandas as pd
+>>> import polars as pl
+>>> data = {'a': [1, 2, 3], 'b': [1, 1, 2]}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.with_columns(
+... a_min_per_group = nw.col('a').min().over('b')
+... )
+... return nw.to_native(df)
+
We can then pass either pandas or Polars:
+>>> func(df_pd)
+ a b a_min_per_group
+0 1 1 1
+1 2 1 1
+2 3 2 3
+>>> func(df_pl)
+shape: (3, 3)
+┌─────┬─────┬─────────────────┐
+│ a ┆ b ┆ a_min_per_group │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ i64 │
+╞═════╪═════╪═════════════════╡
+│ 1 ┆ 1 ┆ 1 │
+│ 2 ┆ 1 ┆ 1 │
+│ 3 ┆ 2 ┆ 3 │
+└─────┴─────┴─────────────────┘
+
unique()
+
+Returns unique values
+ + + +Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({'a': [1, 1, 3, 5, 5], 'b': [2, 4, 4, 6, 6]})
+>>> df_pl = pl.DataFrame({'a': [1, 1, 3, 5, 5], 'b': [2, 4, 4, 6, 6]})
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.select(nw.col('a', 'b').unique())
+... return nw.to_native(df)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 1 2
+1 3 4
+2 5 6
+>>> func(df_pl)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 2 │
+│ 3 ┆ 4 │
+│ 5 ┆ 6 │
+└─────┴─────┘
+
sample(n=None, fraction=None, *, with_replacement=False)
+
+sort(*, descending=False)
+
+std(*, ddof=1)
+
+Get standard deviation.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
ddof |
+
+ int
+ |
+
+
+
+ “Delta Degrees of Freedom”: the divisor used in the calculation is N - ddof, + where N represents the number of elements. By default ddof is 1. + |
+
+ 1
+ |
+
Examples:
+>>> import polars as pl
+>>> import pandas as pd
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({'a': [20, 25, 60], 'b': [1.5, 1, -1.4]})
+>>> df_pl = pl.DataFrame({'a': [20, 25, 60], 'b': [1.5, 1, -1.4]})
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.select(nw.col('a', 'b').std(ddof=0))
+... return nw.to_native(df)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 17.79513 1.265789
+>>> func(df_pl)
+shape: (1, 2)
+┌──────────┬──────────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ f64 ┆ f64 │
+╞══════════╪══════════╡
+│ 17.79513 ┆ 1.265789 │
+└──────────┴──────────┘
+
sum()
+
+Return the sum value.
+ + + +Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({'a': [5, 10], 'b': [50, 100]})
+>>> df_pl = pl.DataFrame({'a': [5, 10], 'b': [50, 100]})
+
Let's define a dataframe-agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.select(nw.col('a', 'b').sum())
+... return nw.to_native(df)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a b
+0 15 150
+>>> func(df_pl)
+shape: (1, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 15 ┆ 150 │
+└─────┴─────┘
+
narwhals.Expr.str
ends_with(suffix)
+
+head(n=5)
+
+Take the first n elements of each string.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
n |
+
+ int
+ |
+
+
+
+ Number of elements to take. + |
+
+ 5
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {'lyrics': ['Atatata', 'taata', 'taatatata', 'zukkyun']}
+>>> df_pd = pd.DataFrame(data)
+>>> df_pl = pl.DataFrame(data)
+
We define a data-frame agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.with_columns(lyrics_head = nw.col('lyrics').str.head())
+... return nw.to_native(df)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ lyrics lyrics_head
+0 Atatata Atata
+1 taata taata
+2 taatatata taata
+3 zukkyun zukky
+>>> func(df_pl)
+shape: (4, 2)
+┌───────────┬─────────────┐
+│ lyrics ┆ lyrics_head │
+│ --- ┆ --- │
+│ str ┆ str │
+╞═══════════╪═════════════╡
+│ Atatata ┆ Atata │
+│ taata ┆ taata │
+│ taatatata ┆ taata │
+│ zukkyun ┆ zukky │
+└───────────┴─────────────┘
+
to_datetime(format)
+
+Convert to Datetime dtype.
+ +pandas defaults to nanosecond time unit, Polars to microsecond. +Prior to pandas 2.0, nanoseconds were the only time unit supported +in pandas, with no ability to set any other one. The ability to +set the time unit in pandas, if the version permits, will arrive.
+Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
format |
+
+ str
+ |
+
+
+
+ Format to parse strings with. Must be passed, as different + dataframe libraries have different ways of auto-inferring + formats. + |
+ + required + | +
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> df_pd = pd.DataFrame({'a': ['2020-01-01', '2020-01-02']})
+>>> df_pl = pl.DataFrame({'a': ['2020-01-01', '2020-01-02']})
+
We define a data-frame agnostic function:
+>>> def func(df_any):
+... df = nw.from_native(df_any)
+... df = df.select(nw.col('a').str.to_datetime(format='%Y-%m-%d'))
+... return nw.to_native(df)
+
We can then pass either pandas or Polars to func
:
>>> func(df_pd)
+ a
+0 2020-01-01
+1 2020-01-02
+>>> func(df_pl)
+shape: (2, 1)
+┌─────────────────────┐
+│ a │
+│ --- │
+│ datetime[μs] │
+╞═════════════════════╡
+│ 2020-01-01 00:00:00 │
+│ 2020-01-02 00:00:00 │
+└─────────────────────┘
+
Anything documented in the API reference is intended to work consistently among +supported backends.
+For example: +
import narwhals as nw
+
+df.with_columns(
+ a_mean = nw.col('a').mean(),
+ a_std = nw.col('a').std(),
+)
+
DataFrame.with_columns
, narwhals.col
, Expr.mean
, and Expr.std
are
+all documented in the API reference.
+However, +
import narwhals as nw
+
+df.with_columns(
+ a_ewm_mean = nw.col('a').ewm_mean(alpha=.7),
+)
+
Expr.ewm_mean
only appears in the Polars API reference, but not in the Narwhals
+one.
+In general, you should expect any fundamental dataframe operation to be supported - if +one that you need is not, please do open a feature request!
+ + + + + + + + + + + + + +narwhals.LazyFrame
Representation of a Lazy computation graph/query against a DataFrame.
+This allows for whole-query optimisation in addition to parallelism, and +is the preferred (and highest-performance) mode of operation for narwhals.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
df |
+
+ Any
+ |
+
+
+
+ A pandas-like dataframe (Pandas, cuDF or Modin), a Polars dataframe, + a Polars lazyframe, a narwhals DataFrame or a narwhals LazyFrame. + |
+ + required + | +
is_polars |
+
+ bool
+ |
+
+
+
+ if set to |
+
+ False
+ |
+
Initialising LazyFrame(...)
directly is equivalent to DataFrame(...).lazy()
.
Examples:
+Constructing a LazyFrame directly from a dictionary:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> data = {"a": [1, 2], "b": [3, 4]}
+>>> lf_pl = pl.LazyFrame(data)
+>>> lf = nw.LazyFrame(lf_pl)
+>>> dframe = lf.collect()
+>>> dframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(dframe)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 3 │
+│ 2 ┆ 4 │
+└─────┴─────┘
+
columns: list[str]
+
+
+ property
+
+
+Get column names.
+Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lf_pl = pl.LazyFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+... ).select("foo", "bar")
+>>> lf = nw.LazyFrame(lf_pl)
+>>> lf.columns
+['foo', 'bar']
+
+ schema: dict[str, DType]
+
+
+ property
+
+
+Get a dict[column name, DType].
+ + + +Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lf_pl = pl.LazyFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> lf = nw.LazyFrame(lf_pl)
+>>> lf.schema
+OrderedDict({'foo': Int64, 'bar': Float64, 'ham': String})
+
collect()
+
+Materialize this LazyFrame into a DataFrame.
+ + + +Returns:
+Type | +Description | +
---|---|
+ DataFrame
+ |
+
+
+
+ DataFrame + |
+
Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lf_pl = pl.LazyFrame(
+... {
+... "a": ["a", "b", "a", "b", "b", "c"],
+... "b": [1, 2, 3, 4, 5, 6],
+... "c": [6, 5, 4, 3, 2, 1],
+... }
+... )
+>>> lf = nw.LazyFrame(lf_pl)
+>>> lf
+┌───────────────────────────────────────────────┐
+| Narwhals LazyFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> df = lf.group_by("a").agg(nw.all().sum()).collect()
+>>> df
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(df).sort("a")
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 4 ┆ 10 │
+│ b ┆ 11 ┆ 10 │
+│ c ┆ 6 ┆ 1 │
+└─────┴─────┴─────┘
+
drop(*columns)
+
+Remove columns from the LazyFrame.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*columns |
+
+ str | Iterable[str]
+ |
+
+
+
+ Names of the columns that should be removed from the + dataframe. Accepts column selector input. + |
+
+ ()
+ |
+
Examples:
+Drop a single column by passing the name of that column.
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lf_pl = pl.LazyFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> lf = nw.LazyFrame(lf_pl)
+>>> lframe = lf.drop("ham").collect()
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ f64 │
+╞═════╪═════╡
+│ 1 ┆ 6.0 │
+│ 2 ┆ 7.0 │
+│ 3 ┆ 8.0 │
+└─────┴─────┘
+
Use positional arguments to drop multiple columns.
+>>> lframe = lf.drop("foo", "ham").collect()
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (3, 1)
+┌─────┐
+│ bar │
+│ --- │
+│ f64 │
+╞═════╡
+│ 6.0 │
+│ 7.0 │
+│ 8.0 │
+└─────┘
+
filter(*predicates)
+
+Filter the rows in the LazyFrame based on a predicate expression.
+The original order of the remaining rows is preserved.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*predicates |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Expression that evaluates to a boolean Series. + |
+
+ ()
+ |
+
Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lf_pl = pl.LazyFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+... )
+
Filter on one condition:
+>>> lf = nw.LazyFrame(lf_pl)
+>>> lframe = lf.filter(nw.col("foo") > 1).collect()
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
Filter on multiple conditions:
+>>> lframe = lf.filter((nw.col("foo") < 3) & (nw.col("ham") == "a")).collect()
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+
Provide multiple filters using *args
syntax:
>>> lframe = lf.filter(
+... nw.col("foo") == 1,
+... nw.col("ham") == "a",
+... ).collect()
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+└─────┴─────┴─────┘
+
Filter on an OR condition:
+>>> lframe = lf.filter((nw.col("foo") == 1) | (nw.col("ham") == "c")).collect()
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (2, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 3 ┆ 8 ┆ c │
+└─────┴─────┴─────┘
+
group_by(*keys)
+
+Start a group by operation.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*keys |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) to group by. Accepts expression input. Strings are +parsed as column names. + |
+
+ ()
+ |
+
Examples:
+Group by one column and call agg
to compute the grouped sum of
+another column.
>>> import polars as pl
+>>> import narwhals as nw
+>>> lf_pl = pl.LazyFrame(
+... {
+... "a": ["a", "b", "a", "b", "c"],
+... "b": [1, 2, 1, 3, 3],
+... "c": [5, 4, 3, 2, 1],
+... }
+... )
+>>> lf = nw.LazyFrame(lf_pl)
+>>> lframe = lf.group_by("a").agg(nw.col("b").sum()).collect().sort("a")
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (3, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ str ┆ i64 │
+╞═════╪═════╡
+│ a ┆ 2 │
+│ b ┆ 5 │
+│ c ┆ 3 │
+└─────┴─────┘
+
Group by multiple columns by passing a list of column names.
+>>> lframe = lf.group_by(["a", "b"]).agg(nw.max("c")).collect().sort(["a", "b"])
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (4, 3)
+┌─────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ str ┆ i64 ┆ i64 │
+╞═════╪═════╪═════╡
+│ a ┆ 1 ┆ 5 │
+│ b ┆ 2 ┆ 4 │
+│ b ┆ 3 ┆ 2 │
+│ c ┆ 3 ┆ 1 │
+└─────┴─────┴─────┘
+
head(n)
+
+Get the first n
rows.
Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
n |
+
+ int
+ |
+
+
+
+ Number of rows to return. + |
+ + required + | +
Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lf_pl = pl.LazyFrame(
+... {
+... "a": [1, 2, 3, 4, 5, 6],
+... "b": [7, 8, 9, 10, 11, 12],
+... }
+... )
+>>> lf = nw.LazyFrame(lf_pl)
+>>> lframe = lf.head(5).collect()
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (5, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+│ 4 ┆ 10 │
+│ 5 ┆ 11 │
+└─────┴─────┘
+>>> lframe = lf.head(2).collect()
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (2, 2)
+┌─────┬─────┐
+│ a ┆ b │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+└─────┴─────┘
+
join(other, *, how='inner', left_on, right_on)
+
+Add a join operation to the Logical Plan.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
other |
+
+ Self
+ |
+
+
+
+ Lazy DataFrame to join with. + |
+ + required + | +
how |
+
+ Literal['inner']
+ |
+
+
+
+ {'inner'} + Join strategy. +
|
+
+ 'inner'
+ |
+
left_on |
+
+ str | list[str]
+ |
+
+
+
+ Join column of the left DataFrame. + |
+ + required + | +
right_on |
+
+ str | list[str]
+ |
+
+
+
+ Join column of the right DataFrame. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Self
+ |
+
+
+
+ A new joined LazyFrame + |
+
Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lf_pl = pl.LazyFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6.0, 7.0, 8.0],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> other_lf_pl = pl.LazyFrame(
+... {
+... "apple": ["x", "y", "z"],
+... "ham": ["a", "b", "d"],
+... }
+... )
+>>> lf = nw.LazyFrame(lf_pl)
+>>> other_lf = nw.LazyFrame(other_lf_pl)
+>>> lframe = lf.join(other_lf, left_on="ham", right_on="ham").collect()
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (2, 4)
+┌─────┬─────┬─────┬───────┐
+│ foo ┆ bar ┆ ham ┆ apple │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str ┆ str │
+╞═════╪═════╪═════╪═══════╡
+│ 1 ┆ 6.0 ┆ a ┆ x │
+│ 2 ┆ 7.0 ┆ b ┆ y │
+└─────┴─────┴─────┴───────┘
+
rename(mapping)
+
+Rename column names.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
mapping |
+
+ dict[str, str]
+ |
+
+
+
+ Key value pairs that map from old name to new name, or a + function that takes the old name as input and returns the + new name. + |
+ + required + | +
If existing names are swapped (e.g. 'A' points to 'B' and 'B' + points to 'A'), polars will block projection and predicate + pushdowns at this node.
+Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lf_pl = pl.LazyFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> lf = nw.LazyFrame(lf_pl)
+>>> lframe = lf.rename({"foo": "apple"}).collect()
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (3, 3)
+┌───────┬─────┬─────┐
+│ apple ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ i64 ┆ str │
+╞═══════╪═════╪═════╡
+│ 1 ┆ 6 ┆ a │
+│ 2 ┆ 7 ┆ b │
+│ 3 ┆ 8 ┆ c │
+└───────┴─────┴─────┘
+
select(*exprs, **named_exprs)
+
+Select columns from this LazyFrame.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*exprs |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to select, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, + other non-expression inputs are parsed as literals. + |
+
+ ()
+ |
+
**named_exprs |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to select, specified as keyword arguments. + The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Examples:
+Pass the name of a column to select that column.
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lf_pl = pl.LazyFrame(
+... {
+... "foo": [1, 2, 3],
+... "bar": [6, 7, 8],
+... "ham": ["a", "b", "c"],
+... }
+... )
+>>> lf = nw.LazyFrame(lf_pl)
+>>> lframe = lf.select("foo").collect()
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (3, 1)
+┌─────┐
+│ foo │
+│ --- │
+│ i64 │
+╞═════╡
+│ 1 │
+│ 2 │
+│ 3 │
+└─────┘
+
Multiple columns can be selected by passing a list of column names.
+>>> lframe = lf.select(["foo", "bar"]).collect()
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 6 │
+│ 2 ┆ 7 │
+│ 3 ┆ 8 │
+└─────┴─────┘
+
Multiple columns can also be selected using positional arguments instead of a +list. Expressions are also accepted.
+>>> lframe = lf.select(nw.col("foo"), nw.col("bar") + 1).collect()
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (3, 2)
+┌─────┬─────┐
+│ foo ┆ bar │
+│ --- ┆ --- │
+│ i64 ┆ i64 │
+╞═════╪═════╡
+│ 1 ┆ 7 │
+│ 2 ┆ 8 │
+│ 3 ┆ 9 │
+└─────┴─────┘
+
Use keyword arguments to easily name your expression inputs.
+>>> lframe = lf.select(threshold=nw.col('foo')*2).collect()
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (3, 1)
+┌───────────┐
+│ threshold │
+│ --- │
+│ i64 │
+╞═══════════╡
+│ 2 │
+│ 4 │
+│ 6 │
+└───────────┘
+
sort(by, *more_by, descending=False)
+
+Sort the LazyFrame by the given columns.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
by |
+
+ str | Iterable[str]
+ |
+
+
+
+ Column(s) to sort by. Accepts expression input. Strings are + parsed as column names. + |
+ + required + | +
*more_by |
+
+ str
+ |
+
+
+
+ Additional columns to sort by, specified as positional + arguments. + |
+
+ ()
+ |
+
descending |
+
+ bool | Sequence[bool]
+ |
+
+
+
+ Sort in descending order. When sorting by multiple + columns, can be specified per column by passing a + sequence of booleans. + |
+
+ False
+ |
+
Examples:
+Pass a single column name to sort by that column.
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lf_pl = pl.LazyFrame(
+... {
+... "a": [1, 2, None],
+... "b": [6.0, 5.0, 4.0],
+... "c": ["a", "c", "b"],
+... }
+... )
+>>> lf = nw.LazyFrame(lf_pl)
+>>> lframe = lf.sort("a").collect()
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (3, 3)
+┌──────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞══════╪═════╪═════╡
+│ null ┆ 4.0 ┆ b │
+│ 1 ┆ 6.0 ┆ a │
+│ 2 ┆ 5.0 ┆ c │
+└──────┴─────┴─────┘
+
Sort by multiple columns by passing a list of columns.
+>>> lframe = lf.sort(["c", "a"], descending=True).collect()
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (3, 3)
+┌──────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞══════╪═════╪═════╡
+│ 2 ┆ 5.0 ┆ c │
+│ null ┆ 4.0 ┆ b │
+│ 1 ┆ 6.0 ┆ a │
+└──────┴─────┴─────┘
+
Or use positional arguments to sort by multiple columns in the same way.
+>>> lframe = lf.sort("c", "a", descending=[False, True]).collect()
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (3, 3)
+┌──────┬─────┬─────┐
+│ a ┆ b ┆ c │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ str │
+╞══════╪═════╪═════╡
+│ 1 ┆ 6.0 ┆ a │
+│ null ┆ 4.0 ┆ b │
+│ 2 ┆ 5.0 ┆ c │
+└──────┴─────┴─────┘
+
unique(subset)
+
+Drop duplicate rows from this LazyFrame.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
subset |
+
+ str | list[str]
+ |
+
+
+
+ Column name(s) to consider when identifying duplicate rows.
+ If set to |
+ + required + | +
Returns:
+Name | Type | +Description | +
---|---|---|
LazyFrame |
+ Self
+ |
+
+
+
+ LazyFrame with unique rows. + |
+
Examples:
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lf_pl = pl.LazyFrame(
+... {
+... "foo": [1, 2, 3, 1],
+... "bar": ["a", "a", "a", "a"],
+... "ham": ["b", "b", "b", "b"],
+... }
+... )
+>>> lf = nw.LazyFrame(lf_pl)
+>>> lframe = lf.unique(None).collect().sort("foo")
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (3, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ str ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ a ┆ b │
+│ 2 ┆ a ┆ b │
+│ 3 ┆ a ┆ b │
+└─────┴─────┴─────┘
+>>> lframe = lf.unique(subset=["bar", "ham"]).collect().sort("foo")
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (1, 3)
+┌─────┬─────┬─────┐
+│ foo ┆ bar ┆ ham │
+│ --- ┆ --- ┆ --- │
+│ i64 ┆ str ┆ str │
+╞═════╪═════╪═════╡
+│ 1 ┆ a ┆ b │
+└─────┴─────┴─────┘
+
with_columns(*exprs, **named_exprs)
+
+Add columns to this LazyFrame.
+Added columns will replace existing columns with the same name.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
*exprs |
+
+ IntoExpr | Iterable[IntoExpr]
+ |
+
+
+
+ Column(s) to add, specified as positional arguments. + Accepts expression input. Strings are parsed as column names, other + non-expression inputs are parsed as literals. + |
+
+ ()
+ |
+
**named_exprs |
+
+ IntoExpr
+ |
+
+
+
+ Additional columns to add, specified as keyword arguments. + The columns will be renamed to the keyword used. + |
+
+ {}
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
LazyFrame |
+ Self
+ |
+
+
+
+ A new LazyFrame with the columns added. + |
+
Creating a new LazyFrame using this method does not create a new copy of +existing data.
+Examples:
+Pass an expression to add it as a new column.
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lf_pl = pl.LazyFrame(
+... {
+... "a": [1, 2, 3, 4],
+... "b": [0.5, 4, 10, 13],
+... "c": [True, True, False, True],
+... }
+... )
+>>> lf = nw.LazyFrame(lf_pl)
+>>> lframe = lf.with_columns((nw.col("a") * 2).alias("2a")).collect()
+>>> lframe
+┌───────────────────────────────────────────────┐
+| Narwhals DataFrame |
+| Use `narwhals.to_native` to see native output |
+└───────────────────────────────────────────────┘
+>>> nw.to_native(lframe)
+shape: (4, 4)
+┌─────┬──────┬───────┬─────┐
+│ a ┆ b ┆ c ┆ 2a │
+│ --- ┆ --- ┆ --- ┆ --- │
+│ i64 ┆ f64 ┆ bool ┆ i64 │
+╞═════╪══════╪═══════╪═════╡
+│ 1 ┆ 0.5 ┆ true ┆ 2 │
+│ 2 ┆ 4.0 ┆ true ┆ 4 │
+│ 3 ┆ 10.0 ┆ false ┆ 6 │
+│ 4 ┆ 13.0 ┆ true ┆ 8 │
+└─────┴──────┴───────┴─────┘
+
narwhals
Here are the top-level functions available in Narwhals.
+ + +all()
+
+Instantiate an expression representing all columns, similar to polars.all
.
col(*names)
+
+Instantiate an expression, similar to polars.col
.
concat(items, *, how='vertical')
+
+from_native(native_dataframe, *, strict=True, eager_only=None, series_only=None, allow_series=None)
+
+Convert dataframe to Narwhals DataFrame, LazyFrame, or Series.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
native_dataframe |
+
+ Any
+ |
+
+
+
+ Raw dataframe from user. +Depending on the other arguments, input object can be: +
|
+ + required + | +
strict |
+
+ bool
+ |
+
+
+
+ Whether to raise if object can't be converted (default) or +to just leave it as-is. + |
+
+ True
+ |
+
eager_only |
+
+ bool | None
+ |
+
+
+
+ Whether to only allow eager objects. + |
+
+ None
+ |
+
series_only |
+
+ bool | None
+ |
+
+
+
+ Whether to only allow series. + |
+
+ None
+ |
+
allow_series |
+
+ bool | None
+ |
+
+
+
+ Whether to allow series (default is only dataframe / lazyframe). + |
+
+ None
+ |
+
Returns:
+Type | +Description | +
---|---|
+ DataFrame | LazyFrame | Series
+ |
+
+
+
+ narwhals.DataFrame or narwhals.LazyFrame or narwhals.Series + |
+
len()
+
+Instantiate an expression representing the length of a dataframe, similar to polars.len
.
max(*columns)
+
+Instantiate an expression representing the maximum of one or more columns, similar to polars.max
.
mean(*columns)
+
+Instantiate an expression representing the mean of one or more columns, similar to polars.mean
.
min(*columns)
+
+Instantiate an expression representing the minimum of one or more columns, similar to polars.min
.
sum(*columns)
+
+Instantiate an expression representing the sum of one or more columns, similar to polars.sum
.
sum_horizontal(*exprs)
+
+Instantiate an expression representing the horizontal sum of one or more expressions, similar to polars.sum_horizontal
.
to_native(narwhals_object, *, strict=True)
+
+Convert Narwhals object to native one.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
narwhals_object |
+
+ LazyFrame | DataFrame | Series
+ |
+
+
+
+ Narwhals object. + |
+ + required + | +
strict |
+
+ bool
+ |
+
+
+
+ whether to raise on non-Narwhals input. + |
+
+ True
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Any
+ |
+
+
+
+ Object of class that user started with. + |
+
narwhals.Series
dtype: Any
+
+
+ property
+
+
+name: str
+
+
+ property
+
+
+shape: tuple[int]
+
+
+ property
+
+
+alias(name)
+
+all()
+
+any()
+
+cast(dtype)
+
+diff()
+
+Calculate the difference with the previous element, for each element.
+ +pandas may change the dtype here, for example when introducing missing
+values in an integer column. To ensure, that the dtype doesn't change,
+you may want to use fill_null
and cast
. For example, to calculate
+the diff and fill missing values with 0
in a Int64 column, you could
+do:
s.diff().fill_null(0).cast(nw.Int64)
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> s = [2, 4, 3]
+>>> s_pd = pd.Series(s)
+>>> s_pl = pl.Series(s)
+
We define a data-frame agnostic function:
+>>> def func(s_any):
+... s = nw.from_native(s_any, series_only=True)
+... s = s.diff()
+... return nw.to_native(s)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 NaN
+1 2.0
+2 -1.0
+dtype: float64
+>>> func(s_pl)
+shape: (3,)
+Series: '' [i64]
+[
+ null
+ 2
+ -1
+]
+
drop_nulls()
+
+filter(other)
+
+is_between(lower_bound, upper_bound, closed='both')
+
+is_in(other)
+
+is_null()
+
+max()
+
+mean()
+
+min()
+
+n_unique()
+
+sample(n=None, fraction=None, *, with_replacement=False)
+
+sort(*, descending=False)
+
+std(*, ddof=1)
+
+sum()
+
+to_numpy()
+
+to_pandas()
+
+unique()
+
+narwhals.Series.str
ends_with(suffix)
+
+head(n=5)
+
+Take the first n elements of each string.
+ + + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
n |
+
+ int
+ |
+
+
+
+ Number of elements to take. + |
+
+ 5
+ |
+
Examples:
+>>> import pandas as pd
+>>> import polars as pl
+>>> import narwhals as nw
+>>> lyrics = ['Atatata', 'taata', 'taatatata', 'zukkyun']
+>>> s_pd = pd.Series(lyrics)
+>>> s_pl = pl.Series(lyrics)
+
We define a data-frame agnostic function:
+>>> def func(s_any):
+... s = nw.from_native(s_any, series_only=True)
+... s = s.str.head()
+... return nw.to_native(s)
+
We can then pass either pandas or Polars to func
:
>>> func(s_pd)
+0 Atata
+1 taata
+2 taata
+3 zukky
+dtype: object
+>>> func(s_pl)
+shape: (2,)
+Series: '' [str]
+[
+ "Atata"
+ "taata"
+ "taata"
+ "zukky"
+]
+