Merge pull request #288 from google/fix-tests-arithm-scalar

Fix tests arithm scalar
google · Oct 31, 2023 · bd9630f · bd9630f
2 parents c622379 + 29e9514
commit bd9630f
Show file tree

Hide file tree

Showing 31 changed files with 961 additions and 2,191 deletions.
diff --git a/docs/public_api_test.py b/docs/public_api_test.py
@@ -89,12 +89,6 @@
     "greater_scalar",
     "less_equal_scalar",
     "less_scalar",
-    # UNARY OPERATORS
-    "abs",
-    "log",
-    "invert",
-    "isnan",
-    "notnan",
 }
 
 

diff --git a/docs/src/reference/index.md b/docs/src/reference/index.md
diff --git a/docs/src/reference/temporian/operators/equal.md b/docs/src/reference/temporian/operators/equal.md
@@ -0,0 +1 @@
+::: temporian.EventSet.equal
diff --git a/docs/src/reference/temporian/operators/unary/abs.md b/docs/src/reference/temporian/operators/unary/abs.md
@@ -0,0 +1 @@
+::: temporian.EventSet.abs
diff --git a/docs/src/reference/temporian/operators/unary/invert.md b/docs/src/reference/temporian/operators/unary/invert.md
diff --git a/docs/src/reference/temporian/operators/unary/isnan.md b/docs/src/reference/temporian/operators/unary/isnan.md
@@ -0,0 +1 @@
+::: temporian.EventSet.isnan
diff --git a/docs/src/reference/temporian/operators/unary/log.md b/docs/src/reference/temporian/operators/unary/log.md
@@ -0,0 +1 @@
+::: temporian.EventSet.log
diff --git a/docs/src/reference/temporian/operators/unary/notnan.md b/docs/src/reference/temporian/operators/unary/notnan.md
@@ -0,0 +1 @@
+::: temporian.EventSet.notnan
diff --git a/docs/src/tutorials/anomaly_detection_unsupervised.ipynb b/docs/src/tutorials/anomaly_detection_unsupervised.ipynb
@@ -2829,19 +2829,19 @@
     "        alerts = (signal >= threshold).filter()[[]]\n",
     "\n",
     "        # False alerts\n",
-    "        false_alerts = tp.equal_scalar(anomalies.moving_count(anomaly_window, sampling=alerts), 0).filter()\n",
+    "        false_alerts = anomalies.moving_count(anomaly_window, sampling=alerts).equal(0).filter()\n",
     "        false_alerts = false_alerts.filter_moving_count(false_alert_shutoff)\n",
     "        num_false_alerts = false_alerts.moving_count(np.inf, sampling=end_of_records)\n",
     "\n",
     "        # Missed targets\n",
-    "        missed_anomalies = tp.equal_scalar(alerts.moving_count(anomaly_window, sampling=anomalies.lag(anomaly_window)), 0).filter().leak(anomaly_window)\n",
+    "        missed_anomalies = alerts.moving_count(anomaly_window, sampling=anomalies.lag(anomaly_window)).equal(0).filter().leak(anomaly_window)\n",
     "        missed_anomalies = missed_anomalies.filter_moving_count(false_alert_shutoff)\n",
     "        num_missed_anomalies = missed_anomalies.moving_count(np.inf, sampling=end_of_records)\n",
     "\n",
     "        # Time to detection\n",
     "        time_to_detection = anomalies.until_next(sampling=alerts, timeout=anomaly_window)\n",
     "        # Set the time to detection of non detected anomalies with the maximum anomaly_window.\n",
-    "        time_to_detection = tp.isnan(time_to_detection).where(anomaly_window, time_to_detection)\n",
+    "        time_to_detection = time_to_detection.isnan().where(anomaly_window, time_to_detection)\n",
     "        # Note: Some machines don't have alerts\n",
     "        sum_time_to_detection = time_to_detection.cumsum(sampling=end_of_records)\n",
     "\n",
@@ -2990,8 +2990,8 @@
    "source": [
     "The model is **much** better than the features alone. This is expected, but it is a good check to do :).\n",
     "\n",
-    "For example, the model is able to detect the anomalies within 200 time-units while raising ~8 false alerts.\r\n",
-    "By comparison, using `f1`or `f2`alone will generate ~55 false alerts for the same time to detection\r\n"
+    "For example, the model is able to detect the anomalies within 200 time-units while raising ~8 false alerts.\n",
+    "By comparison, using `f1`or `f2`alone will generate ~55 false alerts for the same time to detection\n"
    ]
   },
   {

diff --git a/docs/src/tutorials/bank_fraud_detection_with_tfdf.ipynb b/docs/src/tutorials/bank_fraud_detection_with_tfdf.ipynb
@@ -522,7 +522,7 @@
     "dataset_tp.add_index(\"CUSTOMER_ID\").plot(indexes=\"3774\")\n",
     "\n",
     "# Same plot as:\n",
-    "# dataset_tp.filter(tp.equal_scalar(dataset_tp[\"CUSTOMER_ID\"], \"3774\")).plot()"
+    "# dataset_tp.filter(dataset_tp[\"CUSTOMER_ID\"].equal(\"3774\")).plot()"
    ]
   },
   {
@@ -850,7 +850,7 @@
     "train_test_switch_tp.plot()\n",
     "\n",
     "# All the transactions before the demarcating event are part of the training dataset (i.e. `is_train=True`) \n",
-    "is_train = tp.isnan(train_test_switch_tp.since_last(sampling=augmented_dataset_tp))\n",
+    "is_train = train_test_switch_tp.since_last(sampling=augmented_dataset_tp).isnan()\n",
     "is_test = ~is_train\n",
     "\n",
     "# Plot\n",

diff --git a/docs/src/tutorials/loan_outcomes_prediction.ipynb b/docs/src/tutorials/loan_outcomes_prediction.ipynb
diff --git a/docs/src/tutorials/m5_competition.ipynb b/docs/src/tutorials/m5_competition.ipynb
@@ -2284,7 +2284,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "tabular_test = tabular.filter(tp.equal_scalar(tabular[\"day\"], 1914))\n",
+    "tabular_test = tabular.filter(tabular[\"day\"].equal(1914))\n",
     "tabular_train = tabular.filter(\n",
     "    (tabular[\"day\"] >= 30) & (tabular[\"day\"] < 1914)\n",
     ")"

diff --git a/docs/src/user_guide.ipynb b/docs/src/user_guide.ipynb
@@ -1380,7 +1380,7 @@
     "lines_to_next_cell": 0
    },
    "source": [
-    "**Warning:** The Python equality operator (`==`) does not compute element-wise equality between features. Use the `tp.equal()` operator instead."
+    "**Warning:** The Python equality operator (`==`) does not compute element-wise equality between features. Use the `evset.equal()` operator instead."
    ]
   },
   {
@@ -1398,7 +1398,7 @@
    "outputs": [],
    "source": [
     "# Works element-wise as expected\n",
-    "tp.equal(node[\"f1\"], node[\"f3\"])"
+    "node[\"f1\"].equal(node[\"f3\"])"
    ]
   },
   {

diff --git a/temporian/__init__.py b/temporian/__init__.py
@@ -134,13 +134,6 @@
 from temporian.core.operators.scalar.relational_scalar import less_equal_scalar
 from temporian.core.operators.scalar.relational_scalar import less_scalar
 
-# Unary operators
-from temporian.core.operators.unary import invert
-from temporian.core.operators.unary import isnan
-from temporian.core.operators.unary import notnan
-from temporian.core.operators.unary import abs
-from temporian.core.operators.unary import log
-
 # Remove automatic file tree symbols from public API
 # pylint: disable=undefined-variable
 del proto

diff --git a/temporian/core/event_set_ops.py b/temporian/core/event_set_ops.py
@@ -85,9 +85,9 @@ def _raise_error(
         """
 
         raise ValueError(
-            f"Cannot {op_name} {self._clsname} and {type(other)} objects. Only"
-            f" {self._clsname} or values of type ({allowed_types}) are"
-            " supported."
+            f"Cannot use operator '{op_name}' on {self._clsname} and"
+            f" {type(other)} objects. Only {self._clsname} or values of type"
+            f" ({allowed_types}) are supported."
         )
 
     def __ne__(self, other: Any):
@@ -1156,6 +1156,75 @@ def enumerate(self: EventSetOrNode) -> EventSetOrNode:
 
         return enumerate(self)
 
+    def equal(self: EventSetOrNode, other: Any) -> EventSetOrNode:
+        """Checks element-wise equality of an [`EventSet`][temporian.EventSet]
+        to another one or to a single value.
+
+        Each feature is compared element-wise to the feature in
+        `other` in the same position.
+        Note that it will always return False on NaN elements.
+
+        Inputs must have the same sampling and the same number of features.
+
+        Example:
+            ```python
+            >>> a = tp.event_set(
+            ...     timestamps=[1, 2, 3],
+            ...     features={"f1": [0, 100, 200]}
+            ... )
+            >>> b = tp.event_set(
+            ...     timestamps=[1, 2, 3],
+            ...     features={"f2": [-10, 100, 5]},
+            ...     same_sampling_as=a
+            ... )
+
+            >>> # WARN: Don't use this for element-wise comparison
+            >>> a == b
+            False
+
+            >>> # Element-wise comparison to a scalar value
+            >>> c = a.equal(100)
+            >>> c
+            indexes: []
+            features: [('f1', bool_)]
+            events:
+                (3 events):
+                    timestamps: [1. 2. 3.]
+                    'f1': [False True False]
+            ...
+
+            >>> # Element-wise comparison between two EventSets
+            >>> c = a.equal(b)
+            >>> c
+            indexes: []
+            features: [('eq_f1_f2', bool_)]
+            events:
+                (3 events):
+                    timestamps: [1. 2. 3.]
+                    'eq_f1_f2': [False True False]
+            ...
+
+            ```
+
+        Args:
+            other: Second EventSet or single value to compare.
+
+        Returns:
+            EventSet with boolean features.
+        """
+        if isinstance(other, self.__class__):
+            from temporian.core.operators.binary import equal
+
+            return equal(input_1=self, input_2=other)
+
+        if isinstance(other, T_SCALAR + (bool, str)):
+            from temporian.core.operators.scalar import equal_scalar
+
+            return equal_scalar(input=self, value=other)
+
+        self._raise_error("equal", other, "int,float,bool,str")
+        assert False
+
     def experimental_fast_fourier_transform(
         self: EventSetOrNode,
         *,

diff --git a/temporian/core/operators/binary/relational.py b/temporian/core/operators/binary/relational.py
@@ -124,9 +124,7 @@ def equal(
         Result of the comparison.
     """
     assert isinstance(input_1, EventSetNode)
-
-    if not isinstance(input_2, EventSetNode):
-        return equal_scalar(input=input_1, value=input_2)  # type: ignore
+    assert isinstance(input_2, EventSetNode)
 
     return EqualOperator(
         input_1=input_1,

diff --git a/temporian/core/operators/scalar/base.py b/temporian/core/operators/scalar/base.py
@@ -85,9 +85,11 @@ def __init__(
             for feature in input.schema.features:
                 if feature.dtype not in self.map_vtype_dtype[type(value)]:
                     raise ValueError(
-                        f"Scalar has {type(value)=}, which can only operate"
-                        f" with dtypes: {self.map_vtype_dtype[type(value)]}. "
-                        f"But {feature.name} has dtype {feature.dtype}."
+                        f"Cannot add feature '{feature.name}'"
+                        f" (dtype {feature.dtype}) with value '{value}'"
+                        f" of type {type(value)}. Use cast() to convert the"
+                        f" feature to {self.map_vtype_dtype[type(value)]}"
+                        " first, or change the value type."
                     )
 
         # outputs

diff --git a/temporian/core/operators/test/BUILD b/temporian/core/operators/test/BUILD
@@ -186,8 +186,8 @@ py_test(
 )
 
 py_test(
-    name = "test_arithmetic_multi_index",
-    srcs = ["test_arithmetic_multi_index.py"],
+    name = "test_arithmetic",
+    srcs = ["test_arithmetic.py"],
     srcs_version = "PY3",
     deps = [
         "//temporian/implementation/numpy/data:io",
@@ -206,3 +206,58 @@ py_test(
         "//temporian/test:utils",
     ],
 )
+
+py_test(
+    name = "test_arithmetic_scalar",
+    srcs = ["test_arithmetic_scalar.py"],
+    srcs_version = "PY3",
+    deps = [
+        "//temporian/implementation/numpy/data:io",
+        # "//temporian/core/data:duration",
+        "//temporian/test:utils",
+    ],
+)
+
+py_test(
+    name = "test_logical",
+    srcs = ["test_logical.py"],
+    srcs_version = "PY3",
+    deps = [
+        "//temporian/implementation/numpy/data:io",
+        # "//temporian/core/data:duration",
+        "//temporian/test:utils",
+    ],
+)
+
+py_test(
+    name = "test_relational",
+    srcs = ["test_relational.py"],
+    srcs_version = "PY3",
+    deps = [
+        "//temporian/implementation/numpy/data:io",
+        # "//temporian/core/data:duration",
+        "//temporian/test:utils",
+    ],
+)
+
+py_test(
+    name = "test_relational_scalar",
+    srcs = ["test_relational_scalar.py"],
+    srcs_version = "PY3",
+    deps = [
+        "//temporian/implementation/numpy/data:io",
+        # "//temporian/core/data:duration",
+        "//temporian/test:utils",
+    ],
+)
+
+py_test(
+    name = "test_unary",
+    srcs = ["test_unary.py"],
+    srcs_version = "PY3",
+    deps = [
+        "//temporian/implementation/numpy/data:io",
+        # "//temporian/core/data:duration",
+        "//temporian/test:utils",
+    ],
+)
diff --git a/...ators/test/test_arithmetic_multi_index.py → ...an/core/operators/test/test_arithmetic.py b/...ators/test/test_arithmetic_multi_index.py → ...an/core/operators/test/test_arithmetic.py
@@ -20,9 +20,9 @@
 from temporian.test.utils import f32, f64, assertOperatorResult
 
 
-class ArithmeticMultiIndexTest(absltest.TestCase):
-    """Test numpy implementation of all arithmetic operators,
-    but using a two-level index and disordered rows."""
+class ArithmeticTest(absltest.TestCase):
+    """Test arithmetic operators between two event-sets
+    using a two-level index and disordered rows."""
 
     def setUp(self):
         # 2 index columns, 2 feature columns (float64 and float32)
@@ -123,6 +123,32 @@ def test_floordiv(self) -> None:
         )
         assertOperatorResult(self, self.evset_1 // self.evset_2, expected_evset)
 
+    def test_noindex_unsorted(self) -> None:
+        evset_1 = event_set(
+            timestamps=[2, 1, 0, 3],
+            features={
+                "f1": [2, 1, 0, 3],
+                "f2": [20, 10, 0, 30],
+            },
+        )
+        evset_2 = event_set(
+            timestamps=[3, 2, 1, 0],
+            features={
+                "f2": [30, 20, 10, 0],
+                "f1": [-3, -2, -1, 0],
+            },
+            same_sampling_as=evset_1,
+        )
+        expected = event_set(
+            timestamps=[0, 1, 2, 3],
+            features={
+                "add_f1_f2": [0, 11, 22, 33],
+                "add_f2_f1": [0, 9, 18, 27],
+            },
+            same_sampling_as=evset_1,
+        )
+        assertOperatorResult(self, evset_1 + evset_2, expected)
+
 
 if __name__ == "__main__":
     absltest.main()