From 3ee7dc5064bd01e3186060f215eb091c15593fca Mon Sep 17 00:00:00 2001 From: Nwabueze Ugoh <126014542+brentomagic@users.noreply.github.com> Date: Mon, 6 May 2024 21:29:55 +0100 Subject: [PATCH] added unique() docstring --- narwhals/expression.py | 110 +++++++++++------------------------------ 1 file changed, 30 insertions(+), 80 deletions(-) diff --git a/narwhals/expression.py b/narwhals/expression.py index 9a9d89a21..281bd14b6 100644 --- a/narwhals/expression.py +++ b/narwhals/expression.py @@ -384,27 +384,31 @@ def sum(self) -> Expr: return self.__class__(lambda plx: self._call(plx).sum()) def min(self) -> Expr: + return self.__class__(lambda plx: self._call(plx).min()) + + def max(self) -> Expr: """ - Returns the minimum value(s) from a column(s). + Returns the maximum value(s) from a column(s). Examples: - >>> import pandas as pd >>> import polars as pl + >>> import pandas as pd >>> import narwhals as nw - >>> df_pd = pd.DataFrame({'a': [1, 2], 'b': [4, 3]}) - >>> df_pl = pl.DataFrame({'a': [1, 2], 'b': [4, 3]}) + >>> df_pd = pd.DataFrame({'a': [10, 20], 'b': [50, 100]}) + >>> df_pl = pl.DataFrame({'a': [10, 20], 'b': [50, 100]}) Let's define a dataframe-agnostic function: + >>> def func(df_any): - ... df = nw.from_native(df_any) - ... df = df.select(nw.min('a','b')) - ... return nw.to_native(df) + ... df = nw.from_native(df_any) + ... df = df.select(nw.max('a', 'b')) + ... return nw.to_native(df) We can then pass either pandas or Polars to `func`: >>> func(df_pd) - a b - 0 1 3 + a b + 0 20 100 >>> func(df_pl) shape: (1, 2) ┌─────┬─────┐ @@ -412,51 +416,51 @@ def min(self) -> Expr: │ --- ┆ --- │ │ i64 ┆ i64 │ ╞═════╪═════╡ - │ 1 ┆ 3 │ + │ 20 ┆ 100 │ └─────┴─────┘ """ + return self.__class__(lambda plx: self._call(plx).max()) - return self.__class__(lambda plx: self._call(plx).min()) + def n_unique(self) -> Expr: + return self.__class__(lambda plx: self._call(plx).n_unique()) - def max(self) -> Expr: + def unique(self) -> Expr: """ - Returns the maximum value(s) from a column(s). + Returns unique values Examples: >>> import polars as pl >>> import pandas as pd >>> import narwhals as nw - >>> df_pd = pd.DataFrame({'a': [10, 20], 'b': [50, 100]}) - >>> df_pl = pl.DataFrame({'a': [10, 20], 'b': [50, 100]}) + >>> df_pd = pd.DataFrame({'a': [1, 1, 3, 5, 5], 'b': [2, 2, 4, 6, 6]}) + >>> df_pl = pl.DataFrame({'a': [1, 1, 3, 5, 5], 'b': [2, 2, 4, 6, 6]}) Let's define a dataframe-agnostic function: >>> def func(df_any): ... df = nw.from_native(df_any) - ... df = df.select(nw.max('a', 'b')) + ... df = df.select(nw.col('a', 'b').unique()) ... return nw.to_native(df) We can then pass either pandas or Polars to `func`: >>> func(df_pd) - a b - 0 20 100 + a b + 0 1 2 + 1 3 4 + 2 5 6 >>> func(df_pl) - shape: (1, 2) + shape: (3, 2) ┌─────┬─────┐ │ a ┆ b │ │ --- ┆ --- │ │ i64 ┆ i64 │ ╞═════╪═════╡ - │ 20 ┆ 100 │ + │ 1 ┆ 2 │ + │ 3 ┆ 4 │ + │ 5 ┆ 6 │ └─────┴─────┘ """ - return self.__class__(lambda plx: self._call(plx).max()) - - def n_unique(self) -> Expr: - return self.__class__(lambda plx: self._call(plx).n_unique()) - - def unique(self) -> Expr: return self.__class__(lambda plx: self._call(plx).unique()) def sort(self, *, descending: bool = False) -> Expr: @@ -479,60 +483,6 @@ def filter(self, other: Any) -> Expr: ) def is_null(self) -> Expr: - """ - Returns a boolean Series indicating which values are null. - - Examples: - >>> import pandas as pd - >>> import polars as pl - >>> import narwhals as nw - >>> df_pd = pd.DataFrame( - ... { - ... 'a': [2, 4, None, 3, 5], - ... 'b': [2.0, 4.0, float("nan"), 3.0, 5.0] - ... } - ... ) - >>> df_pl = pl.DataFrame( - ... { - ... 'a': [2, 4, None, 3, 5], - ... 'b': [2.0, 4.0, float("nan"), 3.0, 5.0] - ... } - ... ) - - Let's define a dataframe-agnostic function: - - >>> def func(df_any): - ... df = nw.from_native(df_any) - ... df = df.with_columns( - ... a_is_null = nw.col('a').is_null(), - ... b_is_null = nw.col('b').is_null() - ... ) - ... return nw.to_native(df) - - We can then pass either pandas or Polars to `func`: - - >>> func(df_pd) - a b a_is_null b_is_null - 0 2.0 2.0 False False - 1 4.0 4.0 False False - 2 NaN NaN True True - 3 3.0 3.0 False False - 4 5.0 5.0 False False - - >>> func(df_pl) # nan != null for polars - shape: (5, 4) - ┌──────┬─────┬───────────┬───────────┐ - │ a ┆ b ┆ a_is_null ┆ b_is_null │ - │ --- ┆ --- ┆ --- ┆ --- │ - │ i64 ┆ f64 ┆ bool ┆ bool │ - ╞══════╪═════╪═══════════╪═══════════╡ - │ 2 ┆ 2.0 ┆ false ┆ false │ - │ 4 ┆ 4.0 ┆ false ┆ false │ - │ null ┆ NaN ┆ true ┆ false │ - │ 3 ┆ 3.0 ┆ false ┆ false │ - │ 5 ┆ 5.0 ┆ false ┆ false │ - └──────┴─────┴───────────┴───────────┘ - """ return self.__class__(lambda plx: self._call(plx).is_null()) # --- partial reduction ---