From 39e7544d1f82f95e4e44e8cbf1ae4f3c29d3e3ad Mon Sep 17 00:00:00 2001 From: Liam Huber Date: Fri, 9 Aug 2024 09:52:33 -0700 Subject: [PATCH] [patch] Make pickle the default storage backend (#412) * Refactor storage test * Remove empty line * Rename workflow in tests With the introduction of a pickle backend, this test wound up failing. I haven't been able to work out exactly why, but since pickle is much faster my guess is that something in the test parallelization gets in a race condition with deleting a saved workflow from another test. Simply renaming it here solved the issue, which is an innocuous change. * Introduce `pickle` as a backend for saving * Fix root cause of storage conflict Ok, the reason workflow storage tests were failing after introducing pickle was that the original version of the test had the wrong order of the try/finally scope and the for-loop scope. I got away with it earlier because of interplay between the outer-most member of the loop and the default storage backend. Actually fixing the problem is as simple as ensuring the "finally delete" clause happens after _each_ loop step. This does that and reverts the renaming of the workflow. * Again, correctly order try/finally and for-loops * Remove keyword argument from pure-decorator You're only supposed to use it as a decorator to start with, so the kwarg was senseless * Add factory import source This is necessary (but not sufficient) to get `as_dataclass_node` decorated classes to pickle. The field name is a bit off compared to `Function` and `Macro`, as now we decorate a class definition instead of a function definition, but it's close. * Bring the dataclass node in line with function and macro * Leverage new pyiron_snippets.factory stuff to find the class * Mangle the stored dataclass qualname so it can be found later * Add tests * Update docs examples to reflect new naming * Update snippets dependency * [dependabot skip] Update env file * Use new pyiron_snippets syntax consistently * Expose `as_dataclass_node` in the API Now that it's pickling as well as anything else * [patch] Fall back on cloudpickle When the pickle backend fails * [minor] Make pickle the default storage backend * Format black * Fall back on loading any storage contents Regardless of what the specified storage backend was. * Format black --------- Co-authored-by: pyiron-runner --- pyiron_workflow/mixin/storage.py | 24 ++++++++++++++++----- pyiron_workflow/node.py | 4 ++-- tests/unit/test_node.py | 37 ++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 7 deletions(-) diff --git a/pyiron_workflow/mixin/storage.py b/pyiron_workflow/mixin/storage.py index 24d0abca..0c092f33 100644 --- a/pyiron_workflow/mixin/storage.py +++ b/pyiron_workflow/mixin/storage.py @@ -350,7 +350,15 @@ def load(self): Raises: TypeError: when the saved node has a different class name. """ - self.storage.load() + if self.storage.has_contents: + self.storage.load() + else: + # Check for saved content using any other backend + for backend in self.allowed_backends(): + interface = self._storage_interfaces()[backend](self) + if interface.has_contents: + interface.load() + break save.__doc__ += _save_load_warnings @@ -405,6 +413,13 @@ def storage(self) -> StorageInterface: raise ValueError(f"{self.label} does not have a storage backend set") return self._storage_interfaces()[self.storage_backend](self) + @property + def any_storage_has_contents(self): + return any( + self._storage_interfaces()[backend](self).has_contents + for backend in self.allowed_backends() + ) + @property def import_ready(self) -> bool: """ @@ -444,6 +459,9 @@ def _storage_interfaces(cls): interfaces["pickle"] = PickleStorage return interfaces + @classmethod + def default_backend(cls): + return "pickle" class HasH5ioStorage(HasStorage, ABC): @classmethod @@ -467,7 +485,3 @@ def to_storage(self, storage: TinybaseStorage): @abstractmethod def from_storage(self, storage: TinybaseStorage): pass - - @classmethod - def default_backend(cls): - return "tinybase" diff --git a/pyiron_workflow/node.py b/pyiron_workflow/node.py index 66762d15..b213cf06 100644 --- a/pyiron_workflow/node.py +++ b/pyiron_workflow/node.py @@ -331,7 +331,7 @@ def __init__( parent: Optional[Composite] = None, overwrite_save: bool = False, run_after_init: bool = False, - storage_backend: Literal["h5io", "tinybase", "pickle"] | None = "h5io", + storage_backend: Literal["h5io", "tinybase", "pickle"] | None = "pickle", save_after_run: bool = False, **kwargs, ): @@ -384,7 +384,7 @@ def _after_node_setup( self.delete_storage() do_load = False else: - do_load = sys.version_info >= (3, 11) and self.storage.has_contents + do_load = sys.version_info >= (3, 11) and self.any_storage_has_contents if do_load and run_after_init: raise ValueError( diff --git a/tests/unit/test_node.py b/tests/unit/test_node.py index a93a98e5..17a92227 100644 --- a/tests/unit/test_node.py +++ b/tests/unit/test_node.py @@ -497,6 +497,43 @@ def test_storage(self): hard_input.delete_storage() self.n1.delete_storage() + @unittest.skipIf(sys.version_info < (3, 11), "Storage will only work in 3.11+") + def test_storage_compatibility(self): + try: + self.n1.storage_backend = "tinybase" + self.n1.inputs.x = 42 + self.n1.save() + new_n1 = ANode(label=self.n1.label, storage_backend="pickle") + self.assertEqual( + new_n1.inputs.x.value, + self.n1.inputs.x.value, + msg="Even though the new node has a different storage backend, it " + "should still _load_ the data saved with a different backend. To " + "really avoid loading, delete or move the existing save file, or " + "give your new node a different label." + ) + new_n1() + new_n1.save() # With a different backend now + + tiny_n1 = ANode(label=self.n1.label, storage_backend="tinybase") + self.assertIs( + tiny_n1.outputs.y.value, + NOT_DATA, + msg="By explicitly specifying a particular backend, we expect to " + "recover that backend's save-file, even if it is outdated" + ) + + pick_n1 = ANode(label=self.n1.label, storage_backend="pickle") + self.assertEqual( + pick_n1.outputs.y.value, + new_n1.outputs.y.value, + msg="If we specify the more-recently-saved backend, we expect to load " + "the corresponding save file, where output exists" + ) + finally: + self.n1.delete_storage() + + @unittest.skipIf(sys.version_info < (3, 11), "Storage will only work in 3.11+") def test_save_after_run(self): for backend in Node.allowed_backends():