diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index e626df78..00babcfd 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -9,10 +9,12 @@ on: - main - develop - release-* + - spm # TODO: remove before merging pull_request: branches: - develop - release-* + - spm # TODO: remove before merging release: types: - published diff --git a/.gitignore b/.gitignore index 9bf84b23..8b3b8e2c 100644 --- a/.gitignore +++ b/.gitignore @@ -66,5 +66,6 @@ ipython_config.py .ruff_cache .vercel cryosparc/core.c +cryosparc/dataset/core.c *.dSYM cython_debug diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d21ad9f3..12b7c650 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -27,4 +27,5 @@ repos: rev: v1.1.391 hooks: - id: pyright - additional_dependencies: [cython, httpretty, numpy, pytest, setuptools] + additional_dependencies: + [cython, httpx, numpy, pydantic, pytest, setuptools] diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e5e4f66..e2c39776 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,43 @@ # Changelog +## Next + +- BREAKING: replaced low-level `CryoSPARC.cli`, `CryoSPARC.rtp` and `CryoSPARC.vis` attributes with single unified `CryoSPARC.api` +- BREAKING: When a `job.start()` or `job.run()` is called for an external job, changing the job connections with `job.add_input`, `job.add_output` or `job.connect` will now trigger an error. Please add all inputs and outputs and connect all inputs before running an external job. +- BREAKING: `CryoSPARC.download_asset(fileid, target)` no longer accepts a directory target. Must specify a filename. +- BREAKING: removed `CryoSPARC.get_job_specs()`. Use `CryoSPARC.job_register` instead +- BREAKING: `CryoSPARC.list_assets()` and `Job.list_assets()` return list of models instead of list of dictionaries, accessible with dot-notation + - OLD: `job.list_assets()[0]['filename']` + - NEW: `job.list_assets()[0].filename` +- BREAKING: `CryoSPARC.get_lanes()` now returns a list of models instead of dictionaries + - OLD: `cs.get_lanes()[0]['name']` + - NEW: `cs.get_lanes()[0].name` +- BREAKING: `CryoSPARC.get_targets` now returns a list of models instead of dictionaries + - OLD: `cs.get_targets()[0]['hostname']` + - NEW: `cs.get_targets()[0].hostname` + - Some top-level target attributes have also been moved into the `.config` attribute +- BREAKING: `CryoSPARC.print_job_types` `section` argument renamed to `category` + - OLD: `cs.print_job_types(section=["extraction", "refinement"])` + - NEW: `cs.print_job_types(category=["extraction", "refinement"])` +- BREAKING: Restructured schema for Job models, many `Job.doc` properties have been internally rearranged +- Added: `CryoSPARC.job_register` property +- Added: `job.load_input()` and `job.load_output()` now accept `"default"`, `"passthrough"` and `"all"` keywords for their `slots` argument +- Added: `job.alloc_output()` now accepts `dtype_params` argument for fields with dynamic shapes +- Added: `CryoSPARC.print_job_types` now includes a job stability column +- Added: `Job.print_output_spec` now includes a passthrough indicator column for results +- Updated: Improved type definitions +- Deprecated: When adding external inputs and outputs, expanded slot definitions now expect `"name"` key instead of `"prefix"`, support for which will be removed in a future release. + - OLD: `job.add_input("particle", slots=[{"prefix": "component_mode_1", "dtype": "component", "required": True}])` + - NEW: `job.add_input("particle", slots=[{"name": "component_mode_1", "dtype": "component", "required": True}])` +- Deprecated: `license` argument no longer required when creating a `CryoSPARC` + instance, will be removed in a future release +- Deprecated: `external_job.stop()` now expects optional error string instead of boolean, support for boolean errors will be removed in a future release +- Deprecated: `CryoSPARC.get_job_sections()` will be removed in a future release, + use `CryoSPARC.job_register` instead +- Deprecated: Most functions no longer require a `refresh` argument, including + `job.set_param()`, `job.connect()`, `job.disconnect()` and `external_job.save_output()` +- Deprecated: Attributes `Project.doc`, `Workspace.doc` and `Job.doc` will be removed in a future release, use `.model` attribute instead + ## v4.6.1 - Added: Python 3.13 support diff --git a/MANIFEST.in b/MANIFEST.in index 6438bc9e..54ae229e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,7 +1,7 @@ -include cryosparc/core.pyx -include cryosparc/dataset.c -include cryosparc/dataset.pxd -include cryosparc/lz4.pxd +include cryosparc/dataset/core.pyx +include cryosparc/dataset/dataset.c +include cryosparc/dataset/dataset.pxd +include cryosparc/dataset/lz4.pxd include cryosparc/include/cryosparc-tools/dataset.h include cryosparc/include/lz4/lib/lz4.h include cryosparc/include/lz4/lib/lz4.c diff --git a/Makefile b/Makefile index fe331942..6bd78b90 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ PY_EXT_SUFFIX=$(shell python3 -c "import sysconfig; print(sysconfig.get_config_var('EXT_SUFFIX'))") -TARGET=cryosparc/core$(PY_EXT_SUFFIX) +TARGET=cryosparc/dataset/core$(PY_EXT_SUFFIX) all: $(TARGET) @@ -7,7 +7,7 @@ all: $(TARGET) # Primary build target # ----------------------------------------------------------------------------- -$(TARGET): cryosparc/include/cryosparc-tools/*.h cryosparc/dataset.c cryosparc/*.pyx cryosparc/*.pxd setup.py pyproject.toml +$(TARGET): cryosparc/include/cryosparc-tools/*.h cryosparc/dataset/dataset.c cryosparc/dataset/*.pyx cryosparc/dataset/*.pxd setup.py pyproject.toml python3 -m setup build_ext -i # ----------------------------------------------------------------------------- diff --git a/README.md b/README.md index 09d46e83..3852404a 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ See [CONTRIBUTING.md](CONTRIBUTING.md) for full details. ### Prerequisites - Git and Git LFS -- Python >= 3.7 +- Python >= 3.8 - Miniconda3 - C compiler such as GCC or Clang @@ -117,19 +117,16 @@ rm -rf cryosparc/*.so build dist *.egg-info Install dependencies into a new conda environment: ```sh -conda create -n cryosparc-tools-example -c conda-forge \ - python=3 numpy==1.18.5 \ - pyqt=5 libtiff wxPython=4.1.1 adwaita-icon-theme +conda create -n cryosparc-tools-example -c conda-forge python=3 numpy=1.18.5 \ + pyqt=5 libtiff wxPython=4.1.1 adwaita-icon-theme 'setuptools<66' # exclude these dependencies if you don't need cryolo conda activate cryosparc-tools-example -pip install -U pip -pip install nvidia-pyindex matplotlib~=3.4.0 pandas==1.1.4 notebook -pip install "cryolo[c11]" -pip install -e ".[build]" +pip install cryosparc-tools matplotlib~=3.4.0 pandas~=1.1.0 notebook +pip install nvidia-pyindex # exclude last two steps if you don't need cryolo +pip install 'cryolo[c11]' ``` Run the notebook server with the following environment variables: -- `CRYOSPARC_LICENSE_ID` with Structura-issued CryoSPARC license - `CRYOSPARC_EMAIL` with a CryoSPARC user account email - `CRYOSPARC_PASSWORD` with a CryoSPARC user account password @@ -137,7 +134,6 @@ You may also need to include `LD_LIBRARY_PATH` which includes the location of CUDA Toolkit and cuDNN runtime libraries (e.g., `~/miniconda3/envs/tools/lib/python3.8/site-packages/nvidia/*/lib`). ``` -CRYOSPARC_LICENSE_ID="xxxxxxxx-xxxx-xxxx-xxxxxxxxxxxx" \ CRYOSPARC_EMAIL="ali@example.com" \ CRYOSPARC_PASSWORD="password123" \ jupyter notebook diff --git a/cryosparc/api.py b/cryosparc/api.py new file mode 100644 index 00000000..4cf8cb3c --- /dev/null +++ b/cryosparc/api.py @@ -0,0 +1,424 @@ +import json +import re +import urllib.parse +import warnings +from contextlib import contextmanager +from enum import Enum +from typing import Any, Dict, Iterator, List, Optional, Tuple, TypedDict, Union + +import httpx + +from . import registry +from .errors import APIError +from .json_util import api_default, api_encode, api_object_hook +from .models.auth import Token +from .stream import Streamable + +_BASE_RESPONSE_TYPES = {"string", "integer", "number", "boolean"} + +Auth = Union[str, Tuple[str, str]] +""" +Auth token or email/password. +""" + + +class APIRequest(TypedDict): + params: Dict[str, Any] + headers: Dict[str, str] + content: Any + data: Optional[Dict[str, Any]] # form data + files: Optional[Dict[str, Any]] + + +class APINamespace: + """ + Collection of API methods that call a certain namespace, e.g., only the + methods under http://master:39004/pipelines/ + """ + + _client: httpx.Client + + def __init__(self, http_client: httpx.Client): + self._client = http_client + + def _set_headers(self, update: Dict[str, str]): + """For testing only, reset client headers""" + self._client.headers.update(update) + + def _prepare_request_stream(self, streamable): + return streamable.stream() + + def _construct_request(self, _path: str, _schema, *args, **kwargs) -> Tuple[str, APIRequest]: + args = list(args) + query_params = {} + func_name = _schema["summary"] + headers = {} + client_headers = {h for h in self._client.headers.keys()} + request_body = None + data = None + files = None + + for param_schema in sort_params_schema(_path, _schema.get("parameters", [])): + # Compile function params + param_name: str = param_schema["name"] + param_in: str = param_schema["in"] + assert param_in in ( + "path", + "query", + "header", + ), f"[API] Param specification for '{param_name}' in {param_in} NOT supported." + + if param_in == "path" and args: + # path param must be specified positionally + param, args = args[0], args[1:] + _path = _path.replace("{%s}" % param_name, _uriencode(param)) + elif param_in == "query" and param_name in kwargs and (value := kwargs.pop(param_name)) is not None: + # query param must be in kwargs + query_params[param_name] = api_encode(value) + elif ( + param_in == "header" + and (header_name := param_name.replace("-", "_")) in kwargs + and (value := kwargs.pop(header_name)) is not None + ): + # header must be in kwargs + headers[param_name] = api_encode(value) + elif param_in == "header" and param_name in client_headers: + pass # in default headers, no action required + elif param_schema["required"]: + raise TypeError(f"[API] {func_name}() missing required argument: '{param_name}'") + + if "requestBody" in _schema: + content_schema = _schema["requestBody"].get("content", {}) + if stream_mime_type := registry.first_streamable_mime(content_schema.keys()): + body_name = _get_schema_param_name(content_schema[stream_mime_type]["schema"], "body") + headers["Content-Type"] = stream_mime_type + if args: + streamable, args = args[0], args[1:] + elif body_name in kwargs: + streamable = kwargs.pop(body_name) + elif content_schema.get("required", False): + raise TypeError(f"[API] {func_name}() missing required argument: {body_name}") + else: + streamable = None + + if streamable is not None: + if not isinstance(streamable, Streamable): + raise TypeError(f"[API] {func_name}() invalid argument {streamable}; expected Streamable type") + request_body = self._prepare_request_stream(streamable) + elif "application/json" in content_schema: + body_name = _get_schema_param_name(content_schema["application/json"]["schema"], "body") + headers["Content-Type"] = "application/json" + if args: + request_body, args = args[0], args[1:] + request_body = json.dumps(request_body, default=api_default) + elif body_name in kwargs: + request_body = kwargs.pop(body_name) + request_body = json.dumps(request_body, default=api_default) + elif content_schema.get("required", False): + raise TypeError(f"[API] {func_name}() missing required argument: {body_name}") + elif "application/x-www-form-urlencoded" in content_schema: + assert kwargs, ( + f"[API] {func_name}() requires x-www-form-urlencoded which " + "does not yet support positional arguments for the content body." + ) + data, kwargs = kwargs, {} + elif "multipart/form-data" in content_schema: + assert kwargs, ( + f"[API] {func_name}() requires multipart/form-data which " + "does not yet support positional arguments for the content body." + ) + files, kwargs = kwargs, {} + else: + raise TypeError(f"[API] Does not yet support request body with content schema {content_schema}") + + # Fail if any extra parameters + if args: + raise TypeError(f"[API] {func_name}() given extra positional arguments ({', '.join(map(str, args))})") + if kwargs: + raise TypeError(f"[API] {func_name}() given unknown keyword arguments ({', '.join(kwargs.keys())})") + + return _path, APIRequest(params=query_params, headers=headers, content=request_body, data=data, files=files) + + def _handle_response(self, schema, res: httpx.Response): + responses_schema = schema.get("responses", {}) + response_schema = responses_schema.get(str(res.status_code)) + if not response_schema: + res.raise_for_status() + raise APIError("Received unknown response", res=res) + + if "content" not in response_schema: + res.raise_for_status() + return None + + content_schema = response_schema["content"] + stream_mime_type = registry.first_streamable_mime(content_schema.keys()) + if stream_mime_type is not None: # This is a streaming type + stream_class = registry.get_stream_class(stream_mime_type) + assert stream_class + return stream_class.from_iterator( + res.iter_bytes(), + media_type=res.headers.get("Content-Type", stream_mime_type), + ) + elif "text/plain" in content_schema: + return res.text + elif "application/json" in content_schema: + data = res.json(object_hook=api_object_hook) + if res.status_code >= 400: + raise APIError("Received error response", res=res, data=data) + return _decode_json_response(data, content_schema["application/json"]["schema"]) + else: + raise APIError(f"Received unimplemented response type in {content_schema.keys()}", res=res) + + @contextmanager + def _request(self, method: str, url: str, *args, **kwargs) -> Iterator[httpx.Response]: + attempt = 0 + max_attempts = 3 + while attempt < max_attempts: + attempt += 1 + try: + request = self._client.request(method, url, *args, **kwargs) + yield request + return + except httpx.TransportError as err: + warnings.warn( + f"*** API client {method.upper()} {url} failed due to {err} (attempt {attempt}/{max_attempts})" + ) + if attempt == max_attempts: + raise + + @contextmanager + def _request_stream(self, method: str, url: str, *args, **kwargs) -> Iterator[httpx.Response]: + attempt = 0 + max_attempts = 3 + while attempt < max_attempts: + attempt += 1 + try: + with self._client.stream(method, url, *args, **kwargs) as response: + yield response + return + except httpx.TransportError as err: + warnings.warn( + f"*** API client {method.upper()} {url} stream failed due to {err} (attempt {attempt}/{max_attempts})" + ) + if attempt == max_attempts: + raise + + raise RuntimeError(f"[API] Could not complete request {method.upper()} {url}") + + def _call(self, _method: str, _path: str, _schema, *args, **kwargs): + """Meta-call method that runs whenever a named function is called on + this namespace""" + responses_schema = _schema.get("responses", {}) + _path, req = self._construct_request(_path, _schema, *args, **kwargs) + + # Submit a response and get a generator for it. Resulting res_gen should + # only yield a single item. Cleaned up at the end of this function call. + content_schema = responses_schema.get("201", {}).get("content", {}) + stream_mime_type = registry.first_streamable_mime(content_schema.keys()) + + try: + ctx = ( + self._request(_method, _path, **req) + if stream_mime_type is None + else self._request_stream(_method, _path, **req) + ) + with ctx as res: + return self._handle_response(_schema, res) + except httpx.HTTPStatusError as err: + raise APIError("received error response", res=err.response) from err + + +class APIClient(APINamespace): + """Root API namespace interface for an OpenAPI server""" + + _namespace_class = APINamespace + + def __init__( + self, + base_url: Optional[str] = None, + *, + auth: Optional[Auth] = None, # token or email/password + headers: Optional[Dict[str, str]] = None, + timeout: float = 300, + http_client: Optional[httpx.Client] = None, + ): + if base_url and http_client: + raise TypeError(f"Cannot specify both base_url ({base_url}) and http_client ({http_client})") + if not http_client: + if not base_url: + raise TypeError(f"Must specify either base_url ({base_url}) or http_client ({http_client})") + http_client = httpx.Client(base_url=base_url, timeout=timeout) + http_client.headers.update(headers) + super().__init__(http_client) + self._attrs = set() + self(auth=auth) # query the OpenAPI server and populate endpoint functions + + def __del__(self): + # Clean up client when it gets garbage collected + self._client.close() + + def __call__(self, auth: Optional[Auth] = None): + """ + Re-generate all routes and internal namespaces. Optionally accepts + either an authentication bearer token or an email/password tuple. Note + that the password should be encoded in sha256. + """ + self._reset() + try: + with self._request("get", "/openapi.json") as res: + res.raise_for_status() + schema = res.json() + except json.JSONDecodeError as e: + raise ValueError("Error reading JSON response") from e + self._process_schema(schema) + if auth: + self._authorize(auth) + return schema + + def _reset(self): + for attr in self._attrs: + delattr(self, attr) + self._attrs.clear() + + def _process_schema(self, schema): + assert isinstance(schema, dict), "[API] Invalid OpenAPI schema response: Not a dictionary" + for key in {"info", "paths", "components"}: + assert key in schema, ( + f"[API] Invalid OpenAPI schema response: Missing '{key}' key, got keys {list(schema.keys())}" + ) + + for path, path_schema in schema["paths"].items(): + for method, endpoint_schema in path_schema.items(): + self._register_endpoint(method, path, endpoint_schema) + + def _register_endpoint(self, method: str, path: str, schema): + """ + CryoSPARC's OpenAPI server is configured such that the "summary" + property in the provided endpoint schemas at /openapi.json is the name + of the function in python. '.' used to delimit namespaced endpoints. + """ + namespace = self + func_name: str = schema["summary"] + if "." in func_name: + namespace_name, func_name = func_name.split(".", 1) + namespace = self._get_namespace(namespace_name) + else: + self._attrs.add(func_name) + + setattr(namespace, func_name, self._generate_endpoint(func_name, namespace, method, path, schema)) + + def _generate_endpoint(self, func_name: str, namespace, method: str, path: str, schema): + def endpoint(*args, **kwargs): + return namespace._call(method, path, schema, *args, **kwargs) + + endpoint.__name__ = func_name + return endpoint + + def _get_namespace(self, name: str): + if not hasattr(self, name): + setattr(self, name, self._namespace_class(self._client)) + self._attrs.add(name) + namespace = getattr(self, name) + assert isinstance(namespace, self._namespace_class), ( + f"{self} name conflict with namespace '{name}'. This is likely a bug" + ) + return namespace + + def _authorize(self, auth: Auth): + token = ( + Token(access_token=auth, token_type="bearer") + if isinstance(auth, str) + else self.login(grant_type="password", username=auth[0], password=auth[1]) # type: ignore + ) + self._client.headers["Authorization"] = f"{token.token_type.title()} {token.access_token}" + + +def sort_params_schema(path: str, param_schema: List[dict]): + """ + Sort the OpenAPI endpoint parameters schema in order that path params appear + in the given URI. + """ + path_params = {p["name"]: p for p in param_schema if p["in"] == "path"} + known_path_params = re.findall(r"{([^}]*)}", path) + return [path_params[name] for name in known_path_params] + [p for p in param_schema if p["in"] != "path"] + + +def _get_schema_param_name(schema: dict, default: str = "param") -> str: + """ + Given a parameter schema, convert its title to a valid python argument + identifier. Used to determine kwarg name of body arguments. + """ + return schema.get("title", default).lower().replace(" ", "_") + + +def _matches_schema_type(value: Any, schema: dict) -> bool: + if "$ref" in schema: + model_class = registry.model_for_ref(schema["$ref"]) + if model_class: + return True + schema_type = schema.get("type") + if schema_type == "object": + return isinstance(value, dict) + if schema_type == "array": + return isinstance(value, list) + if schema_type in _BASE_RESPONSE_TYPES: + return isinstance(value, (str, float, int, bool)) + return False + + +def _decode_json_response(value: Any, schema: dict): + # Check for empty schema/value, means just return JSON-decoded value as-is + if value is None or not schema: + return value + + # Don't attempt parsing unions without a model + if "anyOf" in schema: + for subschema in schema["anyOf"]: + if _matches_schema_type(value, subschema): + try: + return _decode_json_response(value, subschema) + except (TypeError, ValueError): + continue + warnings.warn("[API] Warning: No union schemas matched response. Returning API result as plain object.") + return value + + # Check for schema that links to one of our existing models + if "$ref" in schema: + model_class = registry.model_for_ref(schema["$ref"]) + if model_class and issubclass(model_class, Enum): + return model_class(value) + elif model_class and issubclass(model_class, dict): # typed dict + return model_class(**value) + elif model_class: # pydantic model + # use model_validate in case validator result derives from subtype, e.g., Event model + return model_class.model_validate(value) # type: ignore + warnings.warn( + f"[API] Warning: Received API response with unregistered schema type {schema['$ref']}. " + "Returning as plain object." + ) + return value + + # Check for Base case types + if "type" in schema and schema["type"] in _BASE_RESPONSE_TYPES: + return value + + # Recursively decode list or tuple + if "type" in schema and schema["type"] == "array": + collection_type, items_key = (tuple, "prefixItems") if "prefixItems" in schema else (list, "items") + return collection_type(_decode_json_response(item, schema[items_key]) for item in value) + + # Recursively decode object + if "type" in schema and schema["type"] == "object": + prop_schemas = schema.get("properties", {}) + default_prop_schema = schema.get("additionalProperties", {}) + return { + key: _decode_json_response(val, prop_schemas.get(key, default_prop_schema)) for key, val in value.items() + } + + # No other result found, return as plain JSON + return value + + +def _uriencode(val: Any): + # Encode any string-compatible value so that it may be used in a URI. + return urllib.parse.quote(val if isinstance(val, (str, bytes)) else str(val)) diff --git a/cryosparc/api.pyi b/cryosparc/api.pyi new file mode 100644 index 00000000..61ed1bdc --- /dev/null +++ b/cryosparc/api.pyi @@ -0,0 +1,2286 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_client.py + +import datetime +from typing import Any, Dict, List, Literal, Optional, Tuple, Union + +from .dataset import Dataset +from .models.api_request import AppSession, SHA256Password +from .models.api_response import ( + BrowseFileResponse, + DeleteProjectPreview, + DeleteWorkspacePreview, + GetFinalResultsResponse, + Hello, + WorkspaceAncestorUidsResponse, + WorkspaceDescendantUidsResponse, +) +from .models.asset import GridFSAsset, GridFSFile +from .models.auth import Token +from .models.diagnostics import RuntimeDiagnostics +from .models.event import CheckpointEvent, Event, ImageEvent, InteractiveEvent, TextEvent +from .models.exposure import Exposure +from .models.external import ExternalOutputSpec +from .models.job import Job, JobStatus +from .models.job_register import JobRegister +from .models.job_spec import Category, InputSpec, InputSpecs, OutputResult, OutputSpec, OutputSpecs +from .models.license import LicenseInstance, UpdateTag +from .models.notification import Notification +from .models.project import GenerateIntermediateResultsSettings, Project, ProjectSymlink +from .models.scheduler_lane import SchedulerLane +from .models.scheduler_target import Cluster, Node, SchedulerTarget +from .models.service import LoggingService, ServiceLogLevel +from .models.session import DataManagementStats, ExposureGroup, ExposureGroupUpdate, LiveComputeResources, Session +from .models.session_config_profile import SessionConfigProfile, SessionConfigProfileBody +from .models.session_params import LiveAbinitParams, LiveClass2DParams, LivePreprocessingParams, LiveRefineParams +from .models.tag import Tag +from .models.user import User +from .models.workspace import Workspace +from .stream import Stream + +Auth = Union[str, Tuple[str, str]] +""" +Auth token or email/password. +""" + +class APINamespace: + def __init__(self, http_client: Any = None) -> None: ... + +class ConfigNamespace(APINamespace): + """ + Methods available in api.config, e.g., api.config.get_instance_uid(...) + """ + def get_instance_uid(self) -> str: + """ + Gets this CryoSPARC instance's unique UID. + """ + ... + def generate_new_instance_uid(self, *, force_takeover_projects: bool = False) -> str: + """ + Generates a new uid for the CryoSPARC instance + If force_takeover_projects is True, overwrites existing lockfiles, + otherwise if force_takeover_projects is False, only creates lockfile in projects that don't already have one + """ + ... + def set_default_job_priority(self, value: int) -> Any: + """ + Job priority + """ + ... + def get_version(self) -> str: + """ + Gets the current CryoSPARC version (with patch suffix, if available) + """ + ... + def get_system_info(self) -> dict: + """ + System information related to the CryoSPARC application + """ + ... + def get(self, name: str, /, *, default: Any = "<>") -> Any: + """ + Gets config collection entry value for the given variable name. + """ + ... + def write(self, name: str, /, value: Any = ..., *, set_on_insert_only: bool = False) -> Any: + """ + Sets config collection entry. Specify `set_on_insert_only` to prevent + overwriting when the value already exists. + """ + ... + +class InstanceNamespace(APINamespace): + """ + Methods available in api.instance, e.g., api.instance.get_update_tag(...) + """ + def get_update_tag(self) -> UpdateTag | None: + """ + Gets information about updating to the next CryoSPARC version, if one is available. + """ + ... + def live_enabled(self) -> bool: + """ + Checks if CryoSPARC Live is enabled + """ + ... + def ecl_enabled(self) -> bool: + """ + Checks if embedded CryoSPARC Live is enabled + """ + ... + def link_log( + self, + type: str, + /, + data: Any = ..., + *, + user_id: Optional[str] = ..., + project_uid: Optional[str] = ..., + job_uid: Optional[str] = ..., + job_type: Optional[str] = ..., + ) -> None: ... + def get_license_usage(self) -> List[LicenseInstance]: ... + def browse_files(self, *, abs_path_glob: str) -> BrowseFileResponse: + """ + Backend for the file browser in the cryosparc UI. + .. note:: + abs_path_glob could have shell vars in it (i.e. $HOME, $SCRATCH) + 0. expand vars + 1. if abs path is already a dir: just list the dir + 2. else: expand the glob + 3. if the glob returns empty: return empty + """ + ... + def get_service_log( + self, + service: LoggingService, + /, + *, + days: int = 7, + date: Optional[str] = ..., + log_name: str = "", + func_name: str = "", + level: Optional[ServiceLogLevel] = ..., + max_lines: Optional[int] = ..., + ) -> str: + """ + Gets cryosparc service logs, filterable by date, name, function, and level + """ + ... + def get_runtime_diagnostics(self) -> RuntimeDiagnostics: + """ + Gets runtime diagnostics for the CryoSPARC instance + """ + ... + +class CacheNamespace(APINamespace): + """ + Methods available in api.cache, e.g., api.cache.get(...) + """ + def get(self, key: str, /, *, namespace: Optional[str] = ...) -> Any: + """ + Returns None if the value is not set or expired + """ + ... + def set(self, key: str, /, value: Any = ..., *, namespace: Optional[str] = ..., ttl: int = 60) -> None: + """ + Sets key to the given value, with a ttl (Time-to-Live) in seconds + """ + ... + +class UsersNamespace(APINamespace): + """ + Methods available in api.users, e.g., api.users.admin_exists(...) + """ + def admin_exists(self) -> bool: + """ + Returns True if there exists at least one user with admin privileges, False + otherwise + """ + ... + def count(self, *, role: Optional[Literal["user", "admin"]] = ...) -> int: ... + def table(self) -> str: + """ + Show a table of all CryoSPARC user accounts + """ + ... + def me(self) -> User: + """ + Returns the current user + """ + ... + def find_one(self, user_id: str, /) -> User: + """ + Finds a user with a matching user ID or email + """ + ... + def update( + self, + user_id: str, + /, + *, + email: Optional[str] = ..., + username: Optional[str] = ..., + first_name: Optional[str] = ..., + last_name: Optional[str] = ..., + ) -> User: + """ + Updates a user's general details. other params will only be set if they are + not empty. + """ + ... + def delete(self, user_id: str, /) -> None: + """ + Removes a user from the CryoSPARC. Only authenticated admins may do this. + """ + ... + def get_role(self, user_id: str, /) -> Literal["user", "admin"]: + """ + Returns "admin" if the user has admin privileges, "user" otherwise. + """ + ... + def create( + self, + password: Optional[SHA256Password] = ..., + *, + email: str, + username: str, + first_name: str, + last_name: str, + role: Literal["user", "admin"] = "user", + ) -> User: + """ + Creates a new CryoSPARC user account. Specify ``created_by_user_id`` as the + id of user who is creating the new user. + + The password is expected as a SHA256 hash. + """ + ... + def request_reset_password(self, user_id: str, /) -> None: + """ + Generates a password reset token for a user with the given email. The token + will appear in the Admin > User Management interface. + """ + ... + def register(self, user_id: str, /, body: SHA256Password, *, token: str) -> None: + """ + Registers user with a token (unauthenticated). + """ + ... + def reset_password(self, user_id: str, /, body: SHA256Password, *, token: str) -> None: + """ + Resets password function with a token (unauthenticated). password is expected + as a sha256 hash. + """ + ... + def set_role(self, user_id: str, /, role: Literal["user", "admin"]) -> User: + """ + Changes a user's from between "user" and "admin". Only admins may do this. + This revokes all access tokens for the given used ID. + """ + ... + def get_my_state_var(self, key: str, /) -> Any: + """ + Retrieves a user's state variable such as "licenseAccepted" or + "recentProjects" + """ + ... + def set_allowed_prefix_dir(self, user_id: str, /, allowed_prefix: str) -> User: + """ + Sets directories that users are allowed to query from the file browser. + ``allowed_prefix`` is the path of the directory the user can query inside. + (must start with "/", and must be an absolute path) + Returns True if successful + """ + ... + def get_state_var(self, user_id: str, key: str, /) -> Any: + """ + Retrieves a given user's state variable such as "licenseAccepted" or + "recentProjects" + """ + ... + def set_state_var(self, user_id: str, key: str, /, value: Any) -> User: + """ + Sets a property of the user's state + """ + ... + def unset_state_var(self, user_id: str, key: str, /) -> User: + """ + Deletes a property of the user's state + """ + ... + def get_lanes(self, user_id: str, /) -> List[str]: + """ + Gets the lanes a user has access to + """ + ... + def set_lanes(self, user_id: str, /, lanes: List[str]) -> User: + """ + Restrict lanes the given user ID may to queue to. Only admins and account + owners may access this function. + """ + ... + +class ResourcesNamespace(APINamespace): + """ + Methods available in api.resources, e.g., api.resources.find_lanes(...) + """ + def find_lanes(self) -> List[SchedulerLane]: + """ + Finds lanes that are registered with the master scheduler. + """ + ... + def add_lane(self, body: SchedulerLane) -> SchedulerLane: + """ + Adds a new lane to the master scheduler. + """ + ... + def find_lane(self, name: str, /, *, type: Literal["node", "cluster", None] = ...) -> SchedulerLane: + """ + Finds a lane registered to the master scheduler with a given name and optional type. + """ + ... + def remove_lane(self, name: str, /) -> None: + """ + Removes the specified lane and any targets assigned under the lane in the + master scheduler. + """ + ... + def find_targets(self, *, lane: Optional[str] = ...) -> List[SchedulerTarget]: + """ + Finds a list of targets that are registered with the master scheduler. + """ + ... + def find_nodes(self, *, lane: Optional[str] = ...) -> List[SchedulerTarget[Node]]: + """ + Finds a list of targets with type "node" that are registered with the master scheduler. + These correspond to discrete worker hostname accessible over SSH. + """ + ... + def add_node(self, body: SchedulerTarget[Node]) -> SchedulerTarget[Node]: + """ + Adds a node or updates an existing node. Updates existing node if they share + share the same name. + """ + ... + def find_clusters(self, *, lane: Optional[str] = ...) -> List[SchedulerTarget[Cluster]]: + """ + Finds a list of targets with type "cluster" that are registered with the master scheduler. + These are multi-node clusters managed by workflow managers like SLURM or PBS and are accessible via submission script. + """ + ... + def add_cluster(self, body: SchedulerTarget[Cluster]) -> SchedulerTarget[Cluster]: + """ + Adds a cluster or updates an existing cluster. Updates existing cluster if + they share share the same name. + """ + ... + def find_target_by_hostname(self, hostname: str, /) -> SchedulerTarget: + """ + Finds a target with a given hostname. + """ + ... + def find_target_by_name(self, name: str, /) -> SchedulerTarget: + """ + Finds a target with a given name. + """ + ... + def find_node(self, name: str, /) -> SchedulerTarget[Node]: + """ + Finds a node with a given name. + """ + ... + def remove_node(self, name: str, /) -> None: + """ + Removes a target worker node from the master scheduler + """ + ... + def find_cluster(self, name: str, /) -> SchedulerTarget[Cluster]: + """ + Finds a cluster with a given name. + """ + ... + def remove_cluster(self, name: str, /) -> None: + """ + Removes the specified cluster/lane and any targets assigned under the lane + in the master scheduler + + Note: This will remove any worker node associated with the specified cluster/lane. + """ + ... + def find_cluster_script(self, name: str, /) -> str: + """ + Finds the cluster script for a cluster with a given name. + """ + ... + def find_cluster_template_vars(self, name: str, /) -> List[str]: + """ + Computes and retrieves all variable names defined in cluster templates. + """ + ... + def find_cluster_template_custom_vars(self, name: str, /) -> List[str]: + """ + Computes and retrieves all custom variables names defined in cluster templates + (i.e., all variables not in the internal list of known variable names). + """ + ... + def update_node_lane(self, name: str, /, lane: str) -> SchedulerTarget[Node]: + """ + Changes the lane on the given target (assumed to exist). Target type must + match lane type. + """ + ... + def refresh_nodes(self) -> Any: + """ + Asynchronously access target worker nodes. Load latest CPU, RAM and GPU + info. + """ + ... + def verify_cluster(self, name: str, /) -> str: + """ + Ensures cluster has been properly configured by executing a generic 'info' + command + """ + ... + def update_cluster_custom_vars(self, name: str, /, value: Dict[str, str]) -> SchedulerTarget[Cluster]: + """ + Changes the custom cluster variables on the given target (assumed to exist) + """ + ... + def update_target_cache_path(self, name: str, /, value: Optional[str]) -> SchedulerTarget: + """ + Changes the cache path on the given target (assumed to exist) + """ + ... + +class AssetsNamespace(APINamespace): + """ + Methods available in api.assets, e.g., api.assets.find(...) + """ + def find(self, *, project_uid: Optional[str] = ..., job_uid: Optional[str] = ...) -> List[GridFSFile]: + """ + List assets associated with projects or jobs on the given instance. + Typically returns files creating during job runs, including plots and metadata. + """ + ... + def upload( + self, + project_uid: str, + job_uid: str, + /, + stream: Stream, + *, + filename: Optional[str] = ..., + format: Union[ + Literal["txt", "csv", "html", "json", "xml", "bild", "bld", "log"], + Literal["pdf", "gif", "jpg", "jpeg", "png", "svg"], + None, + ] = ..., + ) -> GridFSAsset: + """ + Upload a new asset associated with the given project/job. When calling + via HTTP, provide the contents of the file in the request body. At least + one of filename or format must be provided. + """ + ... + def download(self, id: str = "000000000000000000000000", /) -> Stream: + """ + Download the asset with the given ID. When calling via HTTP, file contents + will be in the response body. + """ + ... + def find_one(self, id: str = "000000000000000000000000", /) -> GridFSFile: + """ + Retrive the full details for an asset with the given ID. + """ + ... + +class JobsNamespace(APINamespace): + """ + Methods available in api.jobs, e.g., api.jobs.find(...) + """ + def find( + self, + *, + sort: str = "created_at", + order: Literal[1, -1] = 1, + project_uid: Optional[List[str]] = ..., + workspace_uid: Optional[List[str]] = ..., + uid: Optional[List[str]] = ..., + type: Optional[List[str]] = ..., + status: Optional[List[JobStatus]] = ..., + category: Optional[List[Category]] = ..., + created_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + updated_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + queued_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + completed_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + killed_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + started_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + exported_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + deleted: Optional[bool] = False, + ) -> List[Job]: + """ + Finds all jobs that match the supplied query + """ + ... + def delete_many(self, project_job_uids: List[Tuple[str, str]], *, force: bool = False) -> None: + """ + Deletes the given jobs. Ignores protected jobs if `force` is `True`. + """ + ... + def count( + self, + *, + project_uid: Optional[List[str]] = ..., + workspace_uid: Optional[List[str]] = ..., + uid: Optional[List[str]] = ..., + type: Optional[List[str]] = ..., + status: Optional[List[JobStatus]] = ..., + category: Optional[List[Category]] = ..., + created_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + updated_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + queued_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + completed_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + killed_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + started_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + exported_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + deleted: Optional[bool] = False, + ) -> int: + """ + Counts number of jobs that match the supplied query. + """ + ... + def get_active_count(self) -> int: + """ + Counts number of active jobs. + """ + ... + def find_in_project(self, project_uid: str, /, *, sort: str = "created_at", order: Literal[1, -1] = 1) -> List[Job]: + """ + Finds all jobs in project. + """ + ... + def clone_many( + self, + project_uid: str, + /, + job_uids: List[str], + *, + workspace_uid: Optional[str] = ..., + new_workspace_title: Optional[str] = ..., + ) -> List[Job]: + """ + Clones the given list of jobs. If any jobs are related, it will try to + re-create the input connections between the cloned jobs (but maintain the + same connections to jobs that were not cloned) + """ + ... + def get_chain(self, project_uid: str, /, *, start_job_uid: str, end_job_uid: str) -> List[str]: + """ + Finds the chain of jobs between start job to end job. + A job chain is the intersection of the start job's descendants and the end job's + ancestors. + """ + ... + def clone_chain( + self, + project_uid: str, + /, + *, + start_job_uid: str, + end_job_uid: str, + workspace_uid: Optional[str] = ..., + new_workspace_title: Optional[str] = ..., + ) -> List[Job]: + """ + Clones jobs that directly descend from the start job UID up to the end job UID. + """ + ... + def find_in_workspace( + self, project_uid: str, workspace_uid: str, /, *, sort: str = "created_at", order: Literal[1, -1] = 1 + ) -> List[Job]: + """ + Finds all jobs in workspace. + """ + ... + def create( + self, + project_uid: str, + workspace_uid: str, + /, + params: Optional[Dict[str, Union[bool, int, float, str, str, None]]] = ..., + *, + type: str, + title: str = "", + description: str = "", + created_by_job_uid: Optional[str] = ..., + enable_bench: bool = False, + ) -> Job: + """ + Creates a new job with the given type in the project/workspace + + To see all available job types and their parameters, see the `GET projects/{project_uid}:register` endpoint + """ + ... + def get_final_results(self, project_uid: str, /) -> GetFinalResultsResponse: + """ + Gets all final results within a project, along with the ancestors and non-ancestors of those jobs. + """ + ... + def find_one(self, project_uid: str, job_uid: str, /) -> Job: + """ + Finds the job. + """ + ... + def delete(self, project_uid: str, job_uid: str, /, *, force: bool = False) -> None: + """ + Deletes a job. Will kill (if running) and clearing the job before deleting. + """ + ... + def get_directory(self, project_uid: str, job_uid: str, /) -> str: + """ + Gets the job directory for a given job. + """ + ... + def get_log(self, project_uid: str, job_uid: str, /) -> str: + """ + Returns contents of the job.log file. Returns empty string if job.log does not exist. + """ + ... + def get_log_path(self, project_uid: str, job_uid: str, /) -> str: ... + def get_input_specs(self, project_uid: str, job_uid: str, /) -> InputSpecs: ... + def get_input_spec(self, project_uid: str, job_uid: str, input_name: str, /) -> InputSpec: ... + def add_external_input(self, project_uid: str, job_uid: str, input_name: str, /, body: InputSpec) -> Job: + """ + Add or replace an external job's input. + """ + ... + def get_output_specs(self, project_uid: str, job_uid: str, /) -> OutputSpecs: ... + def get_output_fields( + self, project_uid: str, job_uid: str, output_name: str, /, dtype_params: dict = {} + ) -> List[Tuple[str, str]]: + """ + Expected dataset column definitions for given job output, excluding passthroughs. + """ + ... + def get_output_spec(self, project_uid: str, job_uid: str, output_name: str, /) -> OutputSpec: ... + def add_external_output(self, project_uid: str, job_uid: str, output_name: str, /, body: OutputSpec) -> Job: + """ + Add or replace an external job's output. + """ + ... + def create_external_result(self, project_uid: str, workspace_uid: str, /, body: ExternalOutputSpec) -> Job: + """ + Create an external result with the given specification. Returns an external + job with the given output ready to be saved. Used with cryosparc-tools + """ + ... + def get_status(self, project_uid: str, job_uid: str, /) -> JobStatus: + """ + Gets the status of a job. + """ + ... + def view(self, project_uid: str, workspace_uid: str, job_uid: str, /) -> Job: + """ + Adds a project, workspace and job uid to a user's recently viewed jobs list + """ + ... + def set_param(self, project_uid: str, job_uid: str, param: str, /, *, value: Any) -> Job: + """ + Sets the given job parameter to the value + """ + ... + def clear_param(self, project_uid: str, job_uid: str, param: str, /) -> Job: + """ + Resets the given parameter to its default value. + """ + ... + def load_input( + self, + project_uid: str, + job_uid: str, + input_name: str, + /, + *, + force_join: bool = False, + slots: Union[Literal["default", "passthrough", "all"], List[str]] = "default", + ) -> Dataset: + """ + Load job input dataset. Raises exception if no inputs are connected. + """ + ... + def load_output( + self, + project_uid: str, + job_uid: str, + output_name: str, + /, + *, + version: Union[int, str] = "F", + slots: Union[Literal["default", "passthrough", "all"], List[str]] = "default", + ) -> Dataset: + """ + Load job output dataset. Raises exception if output is empty or does not exists. + """ + ... + def save_output( + self, + project_uid: str, + job_uid: str, + output_name: str, + /, + dataset: Dataset, + *, + filename: Optional[str] = ..., + version: int = 0, + ) -> Job: + """ + Save job output dataset. Job must be running or waiting. + """ + ... + def connect( + self, project_uid: str, job_uid: str, input_name: str, /, *, source_job_uid: str, source_output_name: str + ) -> Job: + """ + Connects the input slot on the child job to the output group on the + parent job. + """ + ... + def disconnect_all(self, project_uid: str, job_uid: str, input_name: str, /) -> Job: ... + def disconnect(self, project_uid: str, job_uid: str, input_name: str, connection_index: int, /) -> Job: + """ + Removes connected inputs on the given input. + + Optionally specify an index to disconnect a specific connection. + + Optionally provide specific results to disconnect from matching connections (other results will be preserved). + """ + ... + def find_output_result(self, project_uid: str, job_uid: str, output_name: str, result_name: str, /) -> OutputResult: + """ + Get a job's low-level output result. + """ + ... + def connect_result( + self, + project_uid: str, + job_uid: str, + input_name: str, + connection_index: int, + result_name: str, + /, + *, + source_job_uid: str, + source_output_name: str, + source_result_name: str, + ) -> Job: + """ + Adds or replaces a result within an input connection with the given output result from a different job. + """ + ... + def disconnect_result( + self, project_uid: str, job_uid: str, input_name: str, connection_index: int, result_name: str, / + ) -> Job: + """ + Removes an output result connected within the given input connection. + """ + ... + def enqueue( + self, + project_uid: str, + job_uid: str, + /, + *, + lane: Optional[str] = ..., + hostname: Optional[str] = ..., + gpus: List[int] = [], + no_check_inputs_ready: bool = False, + ) -> Job: + """ + Adds the job to the queue for the given worker lane (default lane if not specified) + """ + ... + def recalculate_intermediate_results_size(self, project_uid: str, job_uid: str, /) -> Any: + """ + For a job, find intermediate results and recalculate their total size. + """ + ... + def recalculate_project_intermediate_results_size(self, project_uid: str, /) -> Any: + """ + Recaclulates intermediate result sizes for all jobs in a project. + """ + ... + def clear_intermediate_results(self, project_uid: str, job_uid: str, /, *, always_keep_final: bool = True) -> Any: + """ + Removes intermediate results from the job + """ + ... + def export_output_results( + self, project_uid: str, job_uid: str, output_name: str, /, result_names: Optional[List[str]] = ... + ) -> str: + """ + Prepares a job's output for import to another project or instance. Creates a folder in the project directory → exports subfolder, + then links the output's associated files there.. + Note that the returned .csg file's parent folder must be manually copied with symlinks resolved into the target project folder before importing. + """ + ... + def export(self, project_uid: str, job_uid: str, /) -> Job: + """ + Start export for the job into the project's exports directory + """ + ... + def get_output_result_path( + self, project_uid: str, job_uid: str, output_name: str, result_name: str, /, *, version: Union[int, str] = "F" + ) -> str: + """ + Get the absolute path for a job output's dataset or volume density. + """ + ... + def interactive_post( + self, project_uid: str, job_uid: str, /, body: dict, *, endpoint: str, timeout: int = 10 + ) -> Any: + """ + Sends a message to an interactive job. + """ + ... + def mark_running( + self, project_uid: str, job_uid: str, /, *, status: Literal["running", "waiting"] = "running" + ) -> Job: + """ + Indicate that an external job is running or waiting. + """ + ... + def mark_completed(self, project_uid: str, job_uid: str, /) -> Job: + """ + Mark a killed or failed job as completed. + """ + ... + def mark_failed(self, project_uid: str, job_uid: str, /, *, error: Optional[str] = ...) -> Job: + """ + Manually mark a job as failed. + """ + ... + def add_event_log( + self, project_uid: str, job_uid: str, /, text: str, *, type: Literal["text", "warning", "error"] = "text" + ) -> TextEvent: + """ + Add the message to the target job's event log. + """ + ... + def get_event_logs( + self, project_uid: str, job_uid: str, /, *, checkpoint: Optional[int] = ... + ) -> List[Union[Event, CheckpointEvent, TextEvent, ImageEvent, InteractiveEvent]]: + """ + Gets all event logs for a job. + + Note: this may return a lot of documents. + """ + ... + def add_image_log( + self, project_uid: str, job_uid: str, /, images: List[GridFSAsset], *, text: str, flags: List[str] = ["plots"] + ) -> ImageEvent: + """ + Add an image or figure to the target job's event log. + """ + ... + def add_checkpoint(self, project_uid: str, job_uid: str, /, meta: dict) -> CheckpointEvent: + """ + Add a checkpoint the target job's event log. + """ + ... + def recalculate_size(self, project_uid: str, job_uid: str, /) -> Job: + """ + Recalculates the size of a given job's directory. + """ + ... + def clear(self, project_uid: str, job_uid: str, /, *, force: bool = False) -> Job: + """ + Clears a job to get it back to building state (do not clear params or inputs). + """ + ... + def clear_many( + self, + *, + project_uid: Optional[List[str]] = ..., + workspace_uid: Optional[List[str]] = ..., + uid: Optional[List[str]] = ..., + type: Optional[List[str]] = ..., + status: Optional[List[JobStatus]] = ..., + category: Optional[List[Category]] = ..., + created_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + updated_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + queued_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + completed_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + killed_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + started_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + exported_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + deleted: Optional[bool] = False, + ) -> List[Job]: + """ + Clears all jobs that matches the query. + """ + ... + def clone( + self, + project_uid: str, + job_uid: str, + /, + *, + workspace_uid: Optional[str] = ..., + created_by_job_uid: Optional[str] = ..., + ) -> Job: + """ + Creates a new job as a clone of the provided job. + """ + ... + def kill(self, project_uid: str, job_uid: str, /) -> Job: + """ + Kills a running job + """ + ... + def set_final_result(self, project_uid: str, job_uid: str, /, *, is_final_result: bool) -> Job: + """ + Marks a job as a final result. A job marked as a final result and its ancestor jobs are protected during data cleanup. + """ + ... + def set_title(self, project_uid: str, job_uid: str, /, *, title: str) -> Job: + """ + Sets job title. + """ + ... + def set_description(self, project_uid: str, job_uid: str, /, description: str) -> Job: + """ + Sets job description. + """ + ... + def set_priority(self, project_uid: str, job_uid: str, /, *, priority: int) -> Job: + """ + Sets job priority + """ + ... + def set_cluster_custom_vars(self, project_uid: str, job_uid: str, /, cluster_custom_vars: dict) -> Job: + """ + Sets cluster custom variables for job + """ + ... + def get_active_licenses_count(self) -> int: + """ + Gets number of acquired licenses for running jobs + """ + ... + def get_types(self) -> Any: + """ + Gets list of available job types + """ + ... + def get_categories(self) -> Any: + """ + Gets job types by category + """ + ... + def find_ancestor_uids(self, project_uid: str, job_uid: str, /, *, workspace_uid: Optional[str] = ...) -> List[str]: + """ + Finds all ancestors of a single job and return a list of their UIDs + """ + ... + def find_descendant_uids( + self, project_uid: str, job_uid: str, /, *, workspace_uid: Optional[str] = ... + ) -> List[str]: + """ + Find the list of all job UIDs that this job is an ancestor of based + on its outputs. + """ + ... + def link_to_workspace(self, project_uid: str, job_uid: str, workspace_uid: str, /) -> Job: + """ + Adds a job to a workspace. + """ + ... + def unlink_from_workspace(self, project_uid: str, job_uid: str, workspace_uid: str, /) -> Job: + """ + Removes a job from a workspace. + """ + ... + def move(self, project_uid: str, job_uid: str, /, *, from_workspace_uid: str, to_workspace_uid: str) -> Job: + """ + Moves a job from one workspace to another. + """ + ... + def update_directory_symlinks(self, project_uid: str, job_uid: str, /, *, prefix_cut: str, prefix_new: str) -> int: + """ + Rewrites all symbolic links in the job directory, modifying links prefixed with `prefix_cut` to instead be prefixed with `prefix_new`. + """ + ... + def add_tag(self, project_uid: str, job_uid: str, tag_uid: str, /) -> None: + """ + Tags a job with the given tag. + """ + ... + def remove_tag(self, project_uid: str, job_uid: str, tag_uid: str, /) -> None: + """ + Removes the given tag a job. + """ + ... + def import_job(self, project_uid: str, workspace_uid: str, /, *, exported_job_dir_abs: str) -> Job: + """ + Imports the exported job directory into the project. Exported job + directory must be copied to the target project directory with all its symbolic links resolved. + By convention, the exported job directory should be located in the project directory → exports subfolder + """ + ... + def import_result_group( + self, project_uid: str, workspace_uid: str, /, *, csg_path: str, lane: Optional[str] = ... + ) -> Job: + """ + Creates and enqueues an import result group job. + """ + ... + def star_job(self, project_uid: str, job_uid: str, /) -> Job: + """ + Stars a job for a user + """ + ... + def unstar_job(self, project_uid: str, job_uid: str, /) -> Job: + """ + Unstars a job for a user + """ + ... + +class WorkspacesNamespace(APINamespace): + """ + Methods available in api.workspaces, e.g., api.workspaces.find(...) + """ + def find( + self, + *, + sort: str = "created_at", + order: Literal[1, -1] = 1, + uid: Optional[List[str]] = ..., + project_uid: Optional[List[str]] = ..., + created_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + updated_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + deleted: Optional[bool] = False, + ) -> List[Workspace]: + """ + List all workspaces. Specify a filter to list all workspaces in a specific + project. + + Examples: + + >>> api.workspaces.find(project_uid="P1") + """ + ... + def count( + self, + *, + uid: Optional[List[str]] = ..., + project_uid: Optional[List[str]] = ..., + created_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + updated_at: Optional[Tuple[datetime.datetime, datetime.datetime]] = ..., + deleted: Optional[bool] = False, + ) -> int: + """ + Count all workspaces. Use a query to count workspaces in a specific project. + """ + ... + def find_in_project( + self, project_uid: str, /, *, sort: str = "created_at", order: Literal[1, -1] = 1 + ) -> List[Workspace]: + """ + List all workspaces in a project with an optional filter. + """ + ... + def create( + self, + project_uid: str, + /, + *, + title: str, + description: Optional[str] = ..., + created_by_job_uid: Optional[str] = ..., + ) -> Workspace: + """ + Create a new workspace + """ + ... + def preview_delete(self, project_uid: str, workspace_uid: str, /) -> DeleteWorkspacePreview: + """ + Get a list of jobs that would be removed when the given workspace is deleted. + """ + ... + def find_one(self, project_uid: str, workspace_uid: str, /) -> Workspace: + """ + Find a specific workspace in a project + """ + ... + def delete(self, project_uid: str, workspace_uid: str, /) -> None: + """ + Marks the workspace as "deleted". Deletes jobs that are only linked to this workspace + and no other workspace. + """ + ... + def set_title(self, project_uid: str, workspace_uid: str, /, *, title: str) -> Workspace: + """ + Set title of a workspace + """ + ... + def set_description(self, project_uid: str, workspace_uid: str, /, description: str) -> Workspace: + """ + Set description of a workspace + """ + ... + def view(self, project_uid: str, workspace_uid: str, /) -> Workspace: + """ + Adds a workspace uid to a user's recently viewed workspaces list. + """ + ... + def delete_async(self, project_uid: str, workspace_uid: str, /) -> Any: + """ + Starts the workspace deletion task. Deletes jobs that are only linked to this workspace + and no other workspace. + """ + ... + def add_tag(self, project_uid: str, workspace_uid: str, tag_uid: str, /) -> None: + """ + Tag the given workspace with the given tag. + """ + ... + def remove_tag(self, project_uid: str, workspace_uid: str, tag_uid: str, /) -> None: + """ + Removes a tag from a workspace. + """ + ... + def clear_intermediate_results( + self, project_uid: str, workspace_uid: str, /, *, always_keep_final: bool = False + ) -> Any: + """ + Remove intermediate results from a workspace. + """ + ... + def find_workspace_ancestor_uids( + self, project_uid: str, workspace_uid: str, /, job_uids: List[str] + ) -> WorkspaceAncestorUidsResponse: + """ + Finds ancestors of jobs in the workspace + """ + ... + def find_workspace_descendant_uids( + self, project_uid: str, workspace_uid: str, /, job_uids: List[str] + ) -> WorkspaceDescendantUidsResponse: + """ + Finds descendants of jobs in the workspace + """ + ... + def star_workspace(self, project_uid: str, workspace_uid: str, /) -> Workspace: + """ + Stars a workspace for a given user + """ + ... + def unstar_workspace(self, project_uid: str, workspace_uid: str, /) -> Workspace: + """ + Unstars a project for a given user + """ + ... + +class SessionsNamespace(APINamespace): + """ + Methods available in api.sessions, e.g., api.sessions.find(...) + """ + def find(self, *, project_uid: Optional[str] = ...) -> List[Session]: + """ + Lists all sessions (optionally, in a project) + """ + ... + def count(self, *, project_uid: Optional[str]) -> int: + """ + Counts all sessions in a project + """ + ... + def find_one(self, project_uid: str, session_uid: str, /) -> Session: + """ + Finds a session + """ + ... + def delete(self, project_uid: str, session_uid: str, /) -> None: + """ + Sets the session document as "deleted" + Will throw an error if any undeleted jobs exist within the session. + """ + ... + def create( + self, + project_uid: str, + /, + *, + title: str, + description: Optional[str] = ..., + created_by_job_uid: Optional[str] = ..., + ) -> Session: + """ + Creates a new session + """ + ... + def find_exposure_groups(self, project_uid: str, session_uid: str, /) -> List[ExposureGroup]: + """ + Finds all exposure groups in a session. + """ + ... + def create_exposure_group(self, project_uid: str, session_uid: str, /) -> ExposureGroup: + """ + Creates an exposure group for a session. + """ + ... + def find_exposure_group(self, project_uid: str, session_uid: str, exposure_group_id: int, /) -> ExposureGroup: + """ + Finds an exposure group with a specific id for a session. + """ + ... + def update_exposure_group( + self, project_uid: str, session_uid: str, exposure_group_id: int, /, body: ExposureGroupUpdate + ) -> ExposureGroup: + """ + Updates properties of an exposure group. + """ + ... + def delete_exposure_group(self, project_uid: str, session_uid: str, exposure_group_id: int, /) -> Session: + """ + Deletes an exposure group from a session. + """ + ... + def finalize_exposure_group(self, project_uid: str, session_uid: str, exposure_group_id: int, /) -> ExposureGroup: + """ + Finalizes an exposure group. + """ + ... + def start(self, project_uid: str, session_uid: str, /) -> Session: + """ + Builds and starts a CryoSPARC Live Session. Builds file engines based on file + engine parameters in the session doc, builds phase one workers based on lane + parameters in the session doc. + """ + ... + def pause(self, project_uid: str, session_uid: str, /) -> Session: + """ + Pauses a CryoSPARC Live Session. Gracefully stops and kills all phase one workers, file engines and phase two jobs + """ + ... + def update_compute_configuration( + self, project_uid: str, session_uid: str, /, body: LiveComputeResources + ) -> LiveComputeResources: + """ + Updates compute configuration for a session. + """ + ... + def add_tag(self, project_uid: str, session_uid: str, tag_uid: str, /) -> None: + """ + Tags a session with the given tag. + """ + ... + def remove_tag(self, project_uid: str, session_uid: str, tag_uid: str, /) -> None: + """ + Removes the given tag from a session. + """ + ... + def update_session_params( + self, + project_uid: str, + session_uid: str, + /, + body: LivePreprocessingParams, + *, + reprocess: bool = True, + priority: int = 1, + ) -> Session: + """ + Updates a session's params. Updates each exposure inside the session with the new stage to start processing at (if there is one). + """ + ... + def update_session_picker( + self, + project_uid: str, + session_uid: str, + /, + *, + activate_picker_type: Literal["blob", "template", "deep"], + use_thresholds: bool = True, + ) -> Session: + """ + Updates a session's picker. + """ + ... + def update_attribute_threshold( + self, + project_uid: str, + session_uid: str, + attribute: str, + /, + *, + min_val: Optional[float] = ..., + max_val: Optional[float] = ..., + ) -> Session: + """ + Updates thresholds for a given attribute. + """ + ... + def clear_session(self, project_uid: str, session_uid: str, /) -> Session: + """ + Deletes all file engine documents (removing all previously known files and + max timestamps), all Phase 1 Worker jobs and all associated + exposure documents. + """ + ... + def view(self, project_uid: str, session_uid: str, /) -> Session: + """ + Adds a project, workspace and job uid to a user's recently viewed sessions list + """ + ... + def setup_phase2_class2D(self, project_uid: str, session_uid: str, /, *, force_restart: bool = True) -> Job: + """ + Setup streaming 2D classification job for a session. + """ + ... + def enqueue_phase2_class2D(self, project_uid: str, session_uid: str, /) -> Job: + """ + Enqueues streaming 2D Classification job for a session + """ + ... + def stop_phase2_class2D(self, project_uid: str, session_uid: str, /) -> Session: + """ + Stops streaming 2D Classification job for a session + """ + ... + def clear_phase2_class2D(self, project_uid: str, session_uid: str, /) -> Session: + """ + Clears streaming 2D Classification job for a session + """ + ... + def update_phase2_class2D_params(self, project_uid: str, session_uid: str, /, body: LiveClass2DParams) -> Session: + """ + Updates streaming 2D Classification job params for session + """ + ... + def invert_template_phase2_class2D(self, project_uid: str, session_uid: str, template_idx: int, /) -> Session: + """ + Inverts selected template for the streaming 2D Classification job of a job + """ + ... + def invert_all_templates_phase2_class2D(self, project_uid: str, session_uid: str, /) -> Session: + """ + Inverts all templates for a session's streaming 2D classification job + """ + ... + def set_all_templates_phase2_class2D( + self, project_uid: str, session_uid: str, direction: Literal["select", "deselect"], / + ) -> Session: + """ + Sets all templates in the session's streaming 2D Classification job + """ + ... + def select_direction_template_phase2_class2D( + self, + project_uid: str, + session_uid: str, + template_idx: int, + /, + *, + dimension: str, + direction: Literal["above", "below"] = "above", + ) -> Session: + """ + Sets all templates above or below an index for a session's streaming 2D Classification + """ + ... + def start_extract_manual(self, project_uid: str, session_uid: str, /) -> None: + """ + Extracts manual picks from a session + """ + ... + def set_session_exposure_processing_priority( + self, + project_uid: str, + session_uid: str, + /, + *, + exposure_processing_priority: Literal["normal", "oldest", "latest", "alternate"], + ) -> Session: + """ + Sets session exposure processing priority + """ + ... + def update_picking_threshold_values( + self, + project_uid: str, + session_uid: str, + picker_type: Literal["blob", "template", "deep"], + /, + *, + ncc_value: float, + power_min_value: float, + power_max_value: float, + ) -> Session: + """ + Updates picking threshold values for a session + """ + ... + def reset_attribute_threshold(self, project_uid: str, session_uid: str, attribute: str, /) -> Session: + """ + Resets attribute threshold for a session + """ + ... + def reset_all_attribute_thresholds(self, project_uid: str, session_uid: str, /) -> Session: + """ + Resets all attribute thresholds for a session + """ + ... + def setup_template_creation_class2D( + self, + project_uid: str, + session_uid: str, + /, + *, + num_classes: int, + picker_type: Literal["blob", "template", "manual"], + num_mics: int, + override_particle_diameter_A: Optional[float] = ..., + uid_lte: Optional[int] = ..., + ) -> Session: + """ + Setup template creation class2D job for a session + """ + ... + def set_template_creation_job(self, project_uid: str, session_uid: str, /, *, job_uid: str) -> Session: + """ + Set template creation class2D job for a session + """ + ... + def enqueue_template_creation_class2D(self, project_uid: str, session_uid: str, /) -> Job: + """ + Enqueues template creation class2D job for a session + """ + ... + def clear_template_creation_class2D(self, project_uid: str, session_uid: str, /) -> Session: + """ + Clears template creation class2D job for a session + """ + ... + def toggle_template_creation_template(self, project_uid: str, session_uid: str, template_idx: int, /) -> Session: + """ + Toggles template for template creation job at a specific index for a session's template creation job + """ + ... + def toggle_template_creation_all_templates(self, project_uid: str, session_uid: str, /) -> Session: + """ + Toggles templates for all templates for a session's template creation job + """ + ... + def select_template_creation_all( + self, project_uid: str, session_uid: str, direction: Literal["select", "deselect"], / + ) -> Session: + """ + Selects or deselects all templates for a template creation job in a session + """ + ... + def select_template_creation_in_direction( + self, project_uid: str, session_uid: str, template_idx: int, direction: Literal["above", "below"], / + ) -> Session: + """ + Selects all templates above or below an index in a template creation job for a session + """ + ... + def setup_phase2_abinit(self, project_uid: str, session_uid: str, /) -> Job: + """ + Setup Ab-Initio Reconstruction job for a session + """ + ... + def set_phase2_abinit_job(self, project_uid: str, session_uid: str, /, *, job_uid: str) -> Session: + """ + Sets an Ab-Initio Reconstruction job for the session + """ + ... + def enqueue_phase2_abinit(self, project_uid: str, session_uid: str, /) -> Job: + """ + Enqueues Ab-Initio Reconstruction job for a session + """ + ... + def clear_phase2_abinit(self, project_uid: str, session_uid: str, /) -> Session: + """ + Clears Ab-Initio Reconstruction job for a session + """ + ... + def update_phase2_abinit_params(self, project_uid: str, session_uid: str, /, body: LiveAbinitParams) -> Session: + """ + Updates Ab-Initio Reconstruction parameters for the session + """ + ... + def select_phase2_abinit_volume(self, project_uid: str, session_uid: str, /, *, volume_name: str) -> Session: + """ + Selects a volume for an Ab-Initio Reconstruction job in a session + """ + ... + def stop_phase2_abinit(self, project_uid: str, session_uid: str, /) -> Session: + """ + Stops an Ab-Initio Reconstruction job for a session. + """ + ... + def clear_phase2_refine(self, project_uid: str, session_uid: str, /) -> Session: + """ + Clears streaming Homogenous Refinement job for a session + """ + ... + def setup_phase2_refine(self, project_uid: str, session_uid: str, /) -> Job: ... + def enqueue_phase2_refine(self, project_uid: str, session_uid: str, /) -> Job: + """ + Enqueues a streaming Homogenous Refinement job for a session + """ + ... + def stop_phase2_refine(self, project_uid: str, session_uid: str, /) -> Session: + """ + Stops a streaming Homogenous Refinement job for a session + """ + ... + def update_phase2_refine_params(self, project_uid: str, session_uid: str, /, body: LiveRefineParams) -> Session: + """ + Updates parameters for a streaming Homogenous Refinement job for a session + """ + ... + def create_and_enqueue_dump_particles( + self, + project_uid: str, + session_uid: str, + /, + *, + picker_type: Optional[Literal["blob", "template", "manual"]] = ..., + num_mics: Optional[int] = ..., + uid_lte: Optional[int] = ..., + test_only: bool = False, + ) -> Job: + """ + Creates and enqueues a dump particles job for a session + """ + ... + def create_and_enqueue_dump_exposures( + self, project_uid: str, session_uid: str, /, *, export_ignored: bool = False + ) -> Job: + """ + Creates and enqueues a dump exposures job for a session + """ + ... + def get_data_management_stats(self, project_uid: str, /) -> Dict[str, DataManagementStats]: + """ + Gets the data management stats of all sessions in a project. + """ + ... + def mark_session_completed(self, project_uid: str, session_uid: str, /) -> Session: + """ + Marks the session as completed + """ + ... + def change_session_data_management_state( + self, + project_uid: str, + session_uid: str, + /, + *, + datatype: Literal["micrographs", "raw", "particles", "metadata", "thumbnails"], + status: Literal["active", "archiving", "archived", "deleted", "deleting", "missing", "calculating"], + ) -> Session: + """ + Updates data management status of a session's datatype + """ + ... + def update_session_datatype_sizes(self, project_uid: str, session_uid: str, /) -> int: + """ + Updates the session's data_management information with the current size of each datatype. + """ + ... + def get_datatype_size( + self, + project_uid: str, + session_uid: str, + datatype: Literal["micrographs", "raw", "particles", "metadata", "thumbnails"], + /, + ) -> int: + """ + Gets the total size of a datatype inside a session in bytes. + """ + ... + def delete_live_datatype( + self, + project_uid: str, + session_uid: str, + datatype: Literal["micrographs", "raw", "particles", "metadata", "thumbnails"], + /, + ) -> Job | None: + """ + Deletes a specific datatype inside a session. + """ + ... + def update_all_sessions_datatype_sizes(self, project_uid: str, /) -> None: + """ + Asynchronously updates the datatype sizes of all sessions within a project + """ + ... + def get_datatype_file_paths( + self, + project_uid: str, + session_uid: str, + datatype: Literal["micrographs", "raw", "particles", "metadata", "thumbnails"], + /, + ) -> List[str]: + """ + Gets all the file paths associated with a specific datatype inside a session as a list + """ + ... + def get_configuration_profiles(self) -> List[SessionConfigProfile]: + """ + Gets all session configuration profiles + """ + ... + def create_configuration_profile(self, body: SessionConfigProfileBody) -> SessionConfigProfile: + """ + Creates a session configuration profile + """ + ... + def apply_configuration_profile(self, project_uid: str, session_uid: str, /, *, configuration_id: str) -> Session: + """ + Applies a configuration profile to a session, overwriting its resources, parameters, and exposure group + """ + ... + def update_configuration_profile( + self, configuration_id: str, /, body: SessionConfigProfileBody + ) -> SessionConfigProfile: + """ + Updates a configuration profile + """ + ... + def delete_configuration_profile(self, configuration_id: str, /) -> None: + """ + Deletes a configuration profile + """ + ... + def compact_session(self, project_uid: str, session_uid: str, /) -> Any: + """ + Compacts a session by clearing each exposure group and its related files for each exposure in the session. + Also clears gridfs data. + """ + ... + def restore_session(self, project_uid: str, session_uid: str, /, body: LiveComputeResources) -> Any: + """ + Restores exposures of a compacted session. Starts phase 1 worker(s) on the specified lane to restore each exposure by re-processing starting from motion correction, skipping the + picking stage. + """ + ... + def get_session_base_params(self) -> Any: + """ + Gets base session parameters + """ + ... + +class ProjectsNamespace(APINamespace): + """ + Methods available in api.projects, e.g., api.projects.check_directory(...) + """ + def check_directory(self, *, path: str) -> str: + """ + Checks if a candidate project directory exists, and if it is readable and writeable. + """ + ... + def get_title_slug(self, *, title: str) -> str: + """ + Returns a slugified version of a project title. + """ + ... + def find( + self, + *, + sort: str = "created_at", + order: int = 1, + uid: Optional[List[str]] = ..., + project_dir: Optional[str] = ..., + owner_user_id: Optional[str] = ..., + deleted: Optional[bool] = False, + archived: Optional[bool] = ..., + detached: Optional[bool] = ..., + hidden: Optional[bool] = ..., + ) -> List[Project]: + """ + Finds projects matching the filter. + """ + ... + def create(self, *, title: str, description: Optional[str] = ..., parent_dir: str) -> Project: + """ + Creates a new project, project directory and creates a new document in + the project collection + """ + ... + def count( + self, + *, + uid: Optional[List[str]] = ..., + project_dir: Optional[str] = ..., + owner_user_id: Optional[str] = ..., + deleted: Optional[bool] = False, + archived: Optional[bool] = ..., + detached: Optional[bool] = ..., + hidden: Optional[bool] = ..., + ) -> int: + """ + Counts the number of projects matching the filter. + """ + ... + def set_title(self, project_uid: str, /, *, title: str) -> Project: + """ + Sets the title of a project. + """ + ... + def set_description(self, project_uid: str, /, description: str) -> Project: + """ + Sets the description of a project. + """ + ... + def view(self, project_uid: str, /) -> Project: + """ + Adds a project uid to a user's recently viewed projects list. + """ + ... + def mkdir(self, project_uid: str, /, *, parents: bool = False, exist_ok: bool = False, path: str = "") -> str: + """ + Create a directory in the project directory at the given path. + """ + ... + def cp(self, project_uid: str, /, *, source: str, path: str = "") -> str: + """ + Copy the source file or directory to the project directory at the given + path. Returns the absolute path of the copied file. + """ + ... + def symlink(self, project_uid: str, /, *, source: str, path: str = "") -> str: + """ + Create a symlink from the source path in the project directory at the given path. + """ + ... + def upload_file(self, project_uid: str, /, stream: Stream, *, overwrite: bool = False, path: str = "") -> str: + """ + Upload a file to the project directory at the given path. Returns absolute + path of the uploaded file. + + Path may be specified as a filename, a relative path inside the project + directory, or a full absolute path. + """ + ... + def download_file(self, project_uid: str, /, *, path: str = "") -> Stream: + """ + Download a file from the project directory at the given path. + """ + ... + def ls(self, project_uid: str, /, *, recursive: bool = False, path: str = "") -> List[str]: + """ + List files in the project directory. Note that enabling recursive will + include parent directories in the result. + """ + ... + def get_job_register(self, project_uid: str, /) -> JobRegister: + """ + Gets the job register model for the project. The same for every project. + """ + ... + def preview_delete(self, project_uid: str, /) -> DeleteProjectPreview: + """ + Retrieves the workspaces and jobs that would be affected when the project is deleted. + """ + ... + def find_one(self, project_uid: str, /) -> Project: + """ + Finds a project by its UID + """ + ... + def delete(self, project_uid: str, /) -> None: + """ + Deletes the project, its full directory, and all associated workspaces, sessions, jobs and results. + """ + ... + def delete_async(self, project_uid: str, /) -> Any: + """ + Starts project deletion task. Will delete the project, its full directory, and all associated workspaces, sessions, jobs and results. + """ + ... + def get_directory(self, project_uid: str, /) -> str: + """ + Gets the project's absolute directory with all environment variables in the + path resolved + """ + ... + def get_owner_id(self, project_uid: str, /) -> str: + """ + Get user account ID for the owner of a project. + """ + ... + def set_owner(self, project_uid: str, user_id: str, /) -> Project: + """ + Sets owner of the project to the user + """ + ... + def add_user_access(self, project_uid: str, user_id: str, /) -> Project: + """ + Grants access to another user to view and edit the project. + May only be called by project owners and administrators. + """ + ... + def remove_user_access(self, project_uid: str, user_id: str, /) -> Project: + """ + Removes a user's access from a project. + """ + ... + def refresh_size(self, project_uid: str, /) -> Project: + """ + Walks the project directory and update the project size with the sum + of all the file sizes. + """ + ... + def refresh_size_async(self, project_uid: str, /) -> Any: + """ + Starts project size recalculation asynchronously. + """ + ... + def get_symlinks(self, project_uid: str, /) -> List[ProjectSymlink]: + """ + Gets all symbolic links in the project directory + """ + ... + def set_default_param(self, project_uid: str, name: str, /, value: Union[bool, int, float, str]) -> Project: + """ + Sets a default value for a parameter name globally for the project + """ + ... + def clear_default_param(self, project_uid: str, name: str, /) -> Project: + """ + Clears the per-project default value for a parameter name. + """ + ... + def claim_instance_ownership(self, project_uid: str, /, *, force: bool = False) -> None: ... + def claim_all_instance_ownership(self, *, force: bool = False) -> None: + """ + Claims ownership of all projects in instance. Call when upgrading from an older CryoSPARC version that did not support project locks. + """ + ... + def archive(self, project_uid: str, /) -> Project: + """ + Archives a project. This means that the project can no longer be modified + and jobs cannot be created or run. Once archived, the project directory may + be safely moved to long-term storage. + """ + ... + def unarchive(self, project_uid: str, /, *, path: str) -> Project: + """ + Reverses archive operation. + """ + ... + def detach(self, project_uid: str, /) -> Project: + """ + Detaches a project, removing its lockfile. This hides the project from the interface and allows other + instances to attach and run this project. + """ + ... + def attach(self, *, path: str) -> Project: + """ + Attaches a project directory at a specified path and writes a new + lockfile. Must be run on a project directory without a lockfile. + """ + ... + def move(self, project_uid: str, /, *, path: str) -> Project: + """ + Renames the project directory for a project. Provide either the new + directory name or the full new directory path. + """ + ... + def get_next_exposure_group_id(self, project_uid: str, /) -> int: + """ + Gets next exposure group ID + """ + ... + def cleanup_data( + self, + project_uid: str, + /, + *, + workspace_uid: Optional[str] = ..., + delete_non_final: bool = False, + delete_statuses: List[JobStatus] = [], + clear_non_final: bool = False, + clear_categories: List[Category] = [], + clear_types: List[str] = [], + clear_statuses: List[JobStatus] = [], + ) -> Any: + """ + Cleanup project or workspace data, clearing/deleting jobs based on final result status, sections, types, or job status + """ + ... + def add_tag(self, project_uid: str, tag_uid: str, /) -> None: + """ + Tags a project with the given tag. + """ + ... + def remove_tag(self, project_uid: str, tag_uid: str, /) -> None: + """ + Removes the given tag from a project. + """ + ... + def get_generate_intermediate_results_settings(self, project_uid: str, /) -> GenerateIntermediateResultsSettings: + """ + Gets generate intermediate result settings. + """ + ... + def set_generate_intermediate_results_settings( + self, project_uid: str, /, body: GenerateIntermediateResultsSettings + ) -> Project: + """ + Sets settings for intermediate result generation. + """ + ... + def clear_intermediate_results(self, project_uid: str, /, *, always_keep_final: bool = True) -> Any: + """ + Removes intermediate results from the project. + """ + ... + def get_generate_intermediate_results_job_types(self) -> List[str]: + """ + Gets intermediate result job types + """ + ... + def star_project(self, project_uid: str, /) -> Project: + """ + Stars a project for a user + """ + ... + def unstar_project(self, project_uid: str, /) -> Project: + """ + Unstars a project for a user + """ + ... + +class ExposuresNamespace(APINamespace): + """ + Methods available in api.exposures, e.g., api.exposures.reset_manual_reject_exposures(...) + """ + def reset_manual_reject_exposures(self, project_uid: str, session_uid: str, /) -> List[Exposure]: + """ + Resets manual rejection status on all exposures in a session. + """ + ... + def reset_all_exposures(self, project_uid: str, session_uid: str, /) -> None: + """ + Resets all exposures in a session to initial state. + """ + ... + def reset_failed_exposures(self, project_uid: str, session_uid: str, /) -> None: + """ + Resets all failed exposures in a session to initial state. + """ + ... + def reset_exposure(self, project_uid: str, session_uid: str, exposure_uid: int, /) -> Exposure: + """ + Resets exposure to intial state. + """ + ... + def manual_reject_exposure(self, project_uid: str, session_uid: str, exposure_uid: int, /) -> Exposure: + """ + Manually rejects exposure. + """ + ... + def manual_unreject_exposure(self, project_uid: str, session_uid: str, exposure_uid: int, /) -> Exposure: + """ + Manually unrejects exposure. + """ + ... + def toggle_manual_reject_exposure(self, project_uid: str, session_uid: str, exposure_uid: int, /) -> Exposure: + """ + Toggles manual rejection state on exposure. + """ + ... + def reprocess_single_exposure( + self, + project_uid: str, + session_uid: str, + exposure_uid: int, + /, + body: LivePreprocessingParams, + *, + picker_type: Literal["blob", "template"], + ) -> Exposure: + """ + Reprocesses a single micrograph with the passed parameters. If there is a test micrograph + in the session already that is not the same micrograph that the user is currently trying to test, it will be reset + back to the "ctf" stage without the test flag. + """ + ... + def add_manual_pick( + self, project_uid: str, session_uid: str, exposure_uid: int, /, *, x_frac: float, y_frac: float + ) -> Exposure: + """ + Adds a manual pick to an exposure. + """ + ... + def remove_manual_pick( + self, + project_uid: str, + session_uid: str, + exposure_uid: int, + /, + *, + x_frac: float, + y_frac: float, + dist_frac: float = 0.02, + ) -> Exposure: + """ + Removes manual pick from an exposure + """ + ... + def get_individual_picks( + self, + project_uid: str, + session_uid: str, + exposure_uid: int, + picker_type: Literal["blob", "template", "manual"], + /, + ) -> List[List[float]]: + """ + Gets list of picks from an exposure + """ + ... + +class TagsNamespace(APINamespace): + """ + Methods available in api.tags, e.g., api.tags.find(...) + """ + def find( + self, + *, + sort: str = "created_at", + order: Literal["1", "-1"] = "1", + created_by_user_id: Optional[str] = ..., + type: Optional[List[Literal["general", "project", "workspace", "session", "job"]]] = ..., + uid: Optional[str] = ..., + ) -> List[Tag]: + """ + Finds tags that match the given query. + """ + ... + def create( + self, + *, + type: Literal["general", "project", "workspace", "session", "job"], + colour: Optional[ + Literal[ + "black", + "gray", + "red", + "orange", + "yellow", + "green", + "teal", + "cyan", + "sky", + "blue", + "indigo", + "purple", + "pink", + ] + ] = ..., + description: Optional[str] = ..., + created_by_workflow: Optional[str] = ..., + title: Optional[str], + ) -> Tag: + """ + Creates a new tag + """ + ... + def update( + self, + tag_uid: str, + /, + *, + colour: Optional[ + Literal[ + "black", + "gray", + "red", + "orange", + "yellow", + "green", + "teal", + "cyan", + "sky", + "blue", + "indigo", + "purple", + "pink", + ] + ] = ..., + description: Optional[str] = ..., + title: Optional[str], + ) -> Tag: + """ + Updates the title, colour and/or description of the given tag UID + """ + ... + def delete(self, tag_uid: str, /) -> None: + """ + Deletes a given tag + """ + ... + def get_tags_by_type(self) -> Dict[str, List[Tag]]: + """ + Gets all tags as a dictionary, where the types are the keys + """ + ... + def get_tag_count_by_type(self) -> Dict[str, int]: + """ + Gets a count of all tags by type + """ + ... + +class NotificationsNamespace(APINamespace): + """ + Methods available in api.notifications, e.g., api.notifications.deactivate_notification(...) + """ + def deactivate_notification(self, notification_id: str, /) -> Notification: + """ + Deactivates a notification + """ + ... + +class BlueprintsNamespace(APINamespace): + """ + Methods available in api.blueprints, e.g., api.blueprints.create_blueprint(...) + """ + def create_blueprint( + self, schema: dict, *, blueprint_id: str, imported: bool, project_uid: str, job_uid: str, job_type: str + ) -> None: + """ + For cryosparc app only + """ + ... + def edit_blueprint( + self, blueprint_id: str, /, schema: dict, *, project_uid: str, job_uid: str, job_type: str + ) -> None: + """ + For cryosparc app only + """ + ... + def delete_blueprint(self, blueprint_id: str, /, *, job_type: str) -> None: + """ + For cryosparc app only + """ + ... + def apply_blueprint( + self, blueprint_id: str, /, schema: dict, *, project_uid: str, job_uid: str, job_type: str + ) -> None: + """ + For cryosparc app only + """ + ... + +class WorkflowsNamespace(APINamespace): + """ + Methods available in api.workflows, e.g., api.workflows.create_workflow(...) + """ + def create_workflow( + self, schema: dict, *, workflow_id: str, forked: bool = False, imported: bool = False, rebuilt: bool = False + ) -> None: + """ + For cryosparc app only + """ + ... + def edit_workflow(self, workflow_id: str, /, schema: dict) -> None: + """ + For cryosparc app only + """ + ... + def delete_workflow(self, workflow_id: str, /) -> None: + """ + For cryosparc app only + """ + ... + def apply_workflow(self, workflow_id: str, /, schema: dict) -> None: + """ + For cryosparc app only + """ + ... + +class ExternalNamespace(APINamespace): + """ + Methods available in api.external, e.g., api.external.get_empiar_latest_entries(...) + """ + def get_empiar_latest_entries(self) -> dict: ... + def get_emdb_latest_entries(self) -> List[dict]: ... + def get_discuss_top(self) -> dict: ... + def get_discuss_categories(self) -> dict: ... + def get_tutorials(self) -> dict: ... + def get_changelog(self) -> dict: ... + +class DeveloperNamespace(APINamespace): + """ + Methods available in api.developer, e.g., api.developer.get_developers(...) + """ + def get_developers(self) -> List[str]: ... + def reload(self) -> bool: + """ + Restarts API service and scheduler. + """ + ... + def save_job_registers(self) -> List[JobRegister]: + """ + Re-saves the current job registers. Call this when restarting the api + service without executing the /startup route, as we do during developer + reloads. + """ + ... + +class APIClient: + """ + Top-level API client class. e.g., ``api.read_root(...)`` + or ``api.config.get_instance_uid(...)`` + """ + + config: ConfigNamespace + instance: InstanceNamespace + cache: CacheNamespace + users: UsersNamespace + resources: ResourcesNamespace + assets: AssetsNamespace + jobs: JobsNamespace + workspaces: WorkspacesNamespace + sessions: SessionsNamespace + projects: ProjectsNamespace + exposures: ExposuresNamespace + tags: TagsNamespace + notifications: NotificationsNamespace + blueprints: BlueprintsNamespace + workflows: WorkflowsNamespace + external: ExternalNamespace + developer: DeveloperNamespace + + def __init__( + self, + base_url: Optional[str] = None, + *, + auth: Union[str, tuple[str, str], None] = None, + headers: Optional[Dict[str, str]] = None, + timeout: float = ..., + ) -> None: ... + def __call__(self, *, auth: Union[str, tuple[str, str], None] = None) -> Any: ... + def read_root(self) -> Hello: ... + def health(self) -> str: ... + def login( + self, + *, + username: str, + password: str, + grant_type: Optional[str] = ..., + scope: str = "", + client_id: Optional[str] = ..., + client_secret: Optional[str] = ..., + ) -> Token: + """ + Login form. Note that plain-text passwords are not accepted; they must be + hashed as SHA256. + """ + ... + def keycloak_login(self, *, keycloak_access_token: str) -> Token: ... + def verify_app_session(self, body: AppSession) -> str: ... + def job_register(self) -> JobRegister: + """ + Get a specification of available job types and their schemas. + """ + ... + def start_and_migrate(self, *, license_id: str) -> Any: + """ + Start up CryoSPARC for the first time and perform database migrations + """ + ... + def test(self, delay: float, /) -> str: + """ + Sleep for the specified number of seconds and returns a value to indicate + endpoint is working correctly. + """ + ... diff --git a/cryosparc/command.py b/cryosparc/command.py deleted file mode 100644 index 7d25a6e8..00000000 --- a/cryosparc/command.py +++ /dev/null @@ -1,271 +0,0 @@ -""" -Provides classes and functions for communicating with CryoSPARC's command -servers. Generally should not be used directly. -""" - -import json -import os -import socket -import time -import uuid -from contextlib import contextmanager -from typing import Optional, Type -from urllib.error import HTTPError, URLError -from urllib.parse import urlencode -from urllib.request import Request, urlopen -from warnings import warn - -from .errors import CommandError - -MAX_ATTEMPTS = int(os.getenv("CRYOSPARC_COMMAND_RETRIES", 3)) -RETRY_INTERVAL = int(os.getenv("CRYOSPARC_COMMAND_RETRY_SECONDS", 30)) - - -class CommandClient: - """ - Class for communicating with CryoSPARC's ``command_core``, - ``command_vis`` and ``command_rtp`` HTTP services. - - Upon initialization, gets a list of available JSONRPC_ endpoints and creates - corresponding instance methods for each one. Reference of available methods - for the ``command_core`` service (a.k.a. "cli") is available in the - `CryoSPARC Guide`_. - - Args: - service (str, optional): Label for CryoSPARC Command service that this - instance connects to and communicates with, e.g., ``command_core``, - ``command_vis`` or ``command_rtp`` - host (str, optional): Domain name or IP address of CryoSPARC master. - Defaults to "localhost". - port (int, optional): Command server base port. Defaults to 39002. - url (str, optional): Base URL path prefix for all requests (e.g., "/v1"). - Defaults to "". - timeout (int, optional): How long to wait for a request to complete - before timing out, in seconds. Defaults to 300. - headers (dict, optional): Default HTTP headers to send with every - request. Defaults to {}. - cls (Type[JSONEncoder], optional): Class to handle JSON encoding of - special Python objects, such as numpy arrays. Defaults to None. - - Attributes: - - service (str): label of CryoSPARC Command service this instance connects to - and communicates with - - Examples: - - Connect to ``command_core`` - - >>> from cryosparc.command import CommandClient - >>> cli = CommandClient( - ... host="csmaster", - ... port=39002, - ... headers={"License-ID": "xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx"} - ... ) - - Queue a job - - >>> cli.enqueue_job(project_uid="P3", job_uid="J42", lane="csworker") - "launched" - - .. _JSONRPC: - https://www.jsonrpc.org - - .. _CryoSPARC Guide: - https://guide.cryosparc.com/setup-configuration-and-management/management-and-monitoring/cli - - """ - - Error = CommandError - service: str - - def __init__( - self, - service: str = "command", - host: str = "localhost", - port: int = 39002, - url: str = "", - timeout: int = 300, - headers: dict = {}, - cls: Optional[Type[json.JSONEncoder]] = None, - ): - self.service = service - self._url = f"http://{host}:{port}{url}" - self._cls = cls - self._timeout = timeout - self._headers = headers - self._reload() # attempt connection immediately to gather methods - - def _get_callable(self, key): - def func(*args, **kwargs): - params = kwargs if len(kwargs) else args - data = {"jsonrpc": "2.0", "method": key, "params": params, "id": str(uuid.uuid4())} - res = None - try: - with make_json_request(self, "/api", data=data, _stacklevel=4) as request: - res = json.loads(request.read()) - except CommandError as err: - raise CommandError( - f'Encounted error from JSONRPC function "{key}" with params {params}', - url=self._url, - code=err.code, - data=err.data, - ) from err - - if not res: - raise CommandError( - f'JSON response not received from JSONRPC function "{key}" with params {params}', - url=self._url, - ) - elif "error" in res: - error = res["error"] - raise CommandError( - f'Encountered {error.get("name", "Error")} from JSONRPC function "{key}" with params {params}:\n' - f"{format_server_error(error)}", - url=self._url, - code=error.get("code"), - data=error.get("data"), - ) - else: - return res["result"] # OK - - return func - - def _reload(self): - system = self._get_callable("system.describe")() - self._endpoints = [p["name"] for p in system["procs"]] - for key in self._endpoints: - setattr(self, key, self._get_callable(key)) - - def __call__(self): - self._reload() - - -@contextmanager -def make_request( - client: CommandClient, - method: str = "POST", - url: str = "", - *, - query: dict = {}, - data=None, - headers: dict = {}, - _stacklevel=2, # controls warning line number -): - """ - Create a raw HTTP request/response context with the given command client. - - Args: - client (CommandClient): command client instance - method (str, optional): HTTP method. Defaults to "POST". - url (str, optional): URL to append to the client's initialized URL. Defaults to "". - query (dict, optional): Query string parameters. Defaults to {}. - data (any, optional): Request body data. Usually in binary. Defaults to None. - headers (dict, optional): HTTP headers. Defaults to {}. - - Raises: - CommandError: General error such as timeout, URL or HTTP - - Yields: - http.client.HTTPResponse: Use with a context manager to get HTTP response - - Example: - - >>> from cryosparc.command import CommandClient, make_request - >>> cli = CommandClient() - >>> with make_request(cli, url="/download_file", query={'path': '/file.txt'}) as response: - ... data = response.read() - - """ - url = f"{client._url}{url}{'?' + urlencode(query) if query else ''}" - headers = {"Originator": "client", **client._headers, **headers} - attempt = 1 - error_reason = "" - code = 500 - resdata = None - while attempt < MAX_ATTEMPTS: - request = Request(url, data=data, headers=headers, method=method) - response = None - try: - with urlopen(request, timeout=client._timeout) as response: - yield response - return - except HTTPError as error: # command server reported an error - code = error.code - error_reason = ( - f"HTTP Error {error.code} {error.reason}; " - f"please check cryosparcm log {client.service} for additional information." - ) - if error.readable(): - resdata = error.read() - error_reason += f"\nResponse from server: {resdata}" - if resdata and error.headers.get_content_type() == "application/json": - resdata = json.loads(resdata) - - warn(f"*** {type(client).__name__}: ({url}) {error_reason}", stacklevel=_stacklevel) - break - except URLError as error: # command server may be down - error_reason = f"URL Error {error.reason}" - warn( - f"*** {type(client).__name__}: ({url}) {error_reason}, attempt {attempt} of {MAX_ATTEMPTS}. " - f"Retrying in {RETRY_INTERVAL} seconds", - stacklevel=_stacklevel, - ) - time.sleep(RETRY_INTERVAL) - attempt += 1 - except (TimeoutError, socket.timeout): # slow network connection or request - error_reason = "Timeout Error" - warn( - f"*** {type(client).__name__}: command ({url}) " - f"did not reply within timeout of {client._timeout} seconds, " - f"attempt {attempt} of {MAX_ATTEMPTS}", - stacklevel=_stacklevel, - ) - attempt += 1 - - raise CommandError(error_reason, url=url, code=code, data=resdata) - - -def make_json_request(client: CommandClient, url="", *, query={}, data=None, headers={}, _stacklevel=3): - """ - Similar to ``make_request``, except sends request body data JSON and - receives arbitrary response. - - Args: - client (CommandClient): command client instance - url (str, optional): URL path to append to the client's initialized root - URL. Defaults to "". - query (dict, optional): Query string parameters. Defaults to {}. - data (any, optional): JSON-encodable request body. Defaults to None. - headers (dict, optional): HTTP headers. Defaults to {}. - - Yields: - http.client.HTTPResponse: Use with a context manager to get HTTP response - - Raises: - CommandError: General error such as timeout, URL or HTTP - - Example: - - >>> from cryosparc.command import CommandClient, make_json_request - >>> cli = CommandClient() - >>> with make_json_request(cli, url="/download_file", data={'path': '/file.txt'}) as response: - ... data = response.read() - - """ - headers = {"Content-Type": "application/json", **headers} - data = json.dumps(data, cls=client._cls).encode() - return make_request(client, url=url, query=query, data=data, headers=headers, _stacklevel=_stacklevel) - - -def format_server_error(error): - """ - :meta private: - """ - err = error["message"] if "message" in error else str(error) - if "data" in error and error["data"]: - if isinstance(error["data"], dict) and "traceback" in error["data"]: - err += "\n" + error["data"]["traceback"] - else: - err += "\n" + str(error["data"]) - return err diff --git a/cryosparc/constants.py b/cryosparc/constants.py new file mode 100644 index 00000000..5c5c5e30 --- /dev/null +++ b/cryosparc/constants.py @@ -0,0 +1,9 @@ +ONE_MIB = 2**20 +""" +Bytes in 1 mebibyte +""" + +EIGHT_MIB = 2**23 +""" +Bytes in 8 mebibytes +""" diff --git a/cryosparc/controllers/__init__.py b/cryosparc/controllers/__init__.py new file mode 100644 index 00000000..817d2e50 --- /dev/null +++ b/cryosparc/controllers/__init__.py @@ -0,0 +1,82 @@ +""" +Core base classes and utilities for other cryosparc-tools modules. +""" + +# NOTE: This file should only include utilities required only by cryosparc-tools +# CryoSPARC should not depend on anything in this file. +import warnings +from abc import ABC, abstractmethod +from typing import Any, Dict, Generic, Optional, TypeVar, Union + +from pydantic import BaseModel + +from ..models.job_spec import InputSlot, OutputSlot +from ..spec import SlotSpec + +# API model +M = TypeVar("M", bound=BaseModel) + + +class Controller(ABC, Generic[M]): + """ + Abstract base class for Project, Workspace, Job classes and any other types + that have underlying Mongo database documents. + + Generic type argument D is a typed dictionary definition for a Mongo + document. + + :meta private: + """ + + _model: Optional[M] = None + + @property + def model(self) -> M: + """ + Representation of entitity data. Contents may change in CryoSPARC + over time, use use :py:meth:`refresh` to update. + """ + if not self._model: + self.refresh() + assert self._model, "Could not refresh database document" + return self._model + + @model.setter + def model(self, model: M): + self._model = model + + @model.deleter + def model(self): + self._model = None + + @property + def doc(self) -> Dict[str, Any]: + warnings.warn(".doc attribute is deprecated. Use .model attribute instead.", DeprecationWarning, stacklevel=2) + return self.model.model_dump(by_alias=True) + + @abstractmethod + def refresh(self): + # Must be implemented in subclasses + return self + + +def as_input_slot(spec: Union[SlotSpec, InputSlot]) -> InputSlot: + if isinstance(spec, str): + spec, required = (spec[1:], False) if spec[0] == "?" else (spec, True) + return InputSlot(name=spec, dtype=spec, required=required) + elif isinstance(spec, dict) and "dtype" in spec: + dtype = spec["dtype"] + name = spec.get("name") or spec.get("prefix") or dtype + required = spec.get("required", True) + return InputSlot(name=name, dtype=dtype, required=required) + return spec + + +def as_output_slot(spec: Union[SlotSpec, OutputSlot]) -> OutputSlot: + if isinstance(spec, str): + return OutputSlot(name=spec, dtype=spec) + elif isinstance(spec, dict) and "dtype" in spec: + dtype = spec["dtype"] + name = spec.get("name") or spec.get("prefix") or dtype + return OutputSlot(name=name, dtype=dtype) + return spec diff --git a/cryosparc/job.py b/cryosparc/controllers/job.py similarity index 71% rename from cryosparc/job.py rename to cryosparc/controllers/job.py index 24131ded..c1613626 100644 --- a/cryosparc/job.py +++ b/cryosparc/controllers/job.py @@ -2,43 +2,53 @@ Defines the Job and External job classes for accessing CryoSPARC jobs. """ -import json -import math import re -import urllib.parse +import traceback +import warnings from contextlib import contextmanager from io import BytesIO from pathlib import PurePath, PurePosixPath from time import sleep, time -from typing import IO, TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Pattern, Union, overload - -import numpy as n -from typing_extensions import Literal +from typing import ( + IO, + TYPE_CHECKING, + Any, + Dict, + Iterable, + List, + Literal, + Optional, + Pattern, + Sequence, + Tuple, + Union, + overload, +) -from .command import CommandError, make_json_request, make_request -from .dataset import DEFAULT_FORMAT, Dataset -from .errors import ExternalJobError, InvalidSlotsError -from .spec import ( +from ..dataset import DEFAULT_FORMAT, Dataset +from ..errors import ExternalJobError +from ..models.asset import GridFSAsset, GridFSFile +from ..models.job import Job, JobStatus +from ..models.job_spec import InputSpec, OutputSpec +from ..spec import ( ASSET_CONTENT_TYPES, IMAGE_CONTENT_TYPES, TEXT_CONTENT_TYPES, - AssetDetails, AssetFormat, Datatype, - EventLogAsset, ImageFormat, - JobDocument, - JobStatus, - MongoController, + LoadableSlots, SlotSpec, TextFormat, ) -from .util import bopen, first, print_table +from ..stream import Stream +from ..util import first, print_table +from . import Controller, as_input_slot, as_output_slot if TYPE_CHECKING: from numpy.typing import ArrayLike, NDArray - from .tools import CryoSPARC + from ..tools import CryoSPARC GROUP_NAME_PATTERN = r"^[A-Za-z][0-9A-Za-z_]*$" @@ -47,17 +57,20 @@ """ -class Job(MongoController[JobDocument]): +class JobController(Controller[Job]): """ Accessor class to a job in CryoSPARC with ability to load inputs and - outputs, add to job log, download job files. Should be instantiated - through `CryoSPARC.find_job`_ or `Project.find_job`_. + outputs, add to job log, download job files. Should be created with + :py:meth:`cs.find_job() ` or + :py:meth:`project.find_job() `. + + Arguments: + job (tuple[str, str] | Job): either _(Project UID, Job UID)_ tuple or + Job model, e.g. ``("P3", "J42")`` Attributes: - uid (str): Job unique ID, e.g., "J42" - project_uid (str): Project unique ID, e.g., "P3" - doc (JobDocument): All job data from the CryoSPARC database. Database - contents may change over time, use the `refresh`_ method to update. + model (Workspace): All job data from the CryoSPARC database. + Contents may change over time, use :py:method:`refresh` to update. Examples: @@ -83,44 +96,60 @@ class Job(MongoController[JobDocument]): >>> job.queue() >>> job.status "queued" + """ - .. _CryoSPARC.find_job: - tools.html#cryosparc.tools.CryoSPARC.find_job - - .. _Project.find_job: - project.html#cryosparc.project.Project.find_job - - .. _refresh: - #cryosparc.job.Job.refresh + uid: str + """ + Job unique ID, e.g., "J42" + """ + project_uid: str + """ + Project unique ID, e.g., "P3" """ - def __init__(self, cs: "CryoSPARC", project_uid: str, uid: str) -> None: + def __init__(self, cs: "CryoSPARC", job: Union[Tuple[str, str], Job]) -> None: self.cs = cs - self.project_uid = project_uid - self.uid = uid + if isinstance(job, tuple): + self.project_uid, self.uid = job + self.refresh() + else: + self.project_uid = job.project_uid + self.uid = job.uid + self.model = job @property def type(self) -> str: """ Job type key """ - return self.doc["job_type"] + return self.model.spec.type @property def status(self) -> JobStatus: """ JobStatus: scheduling status. """ - return self.doc["status"] + return self.model.status + + @property + def full_spec(self): + """ + The full specification for job inputs, outputs and parameters, as + defined in the job register. + """ + spec = first(spec for spec in self.cs.job_register.specs if spec.type == self.type) + if not spec: + raise RuntimeError(f"Could not find job specification for type {type}") + return spec def refresh(self): """ Reload this job from the CryoSPARC database. Returns: - Job: self + JobController: self """ - self._doc = self.cs.cli.get_job(self.project_uid, self.uid) # type: ignore + self.model = self.cs.api.jobs.find_one(self.project_uid, self.uid) return self def dir(self) -> PurePosixPath: @@ -130,7 +159,7 @@ def dir(self) -> PurePosixPath: Returns: Path: job directory Pure Path instance """ - return PurePosixPath(self.cs.cli.get_job_dir_abs(self.project_uid, self.uid)) # type: ignore + return PurePosixPath(self.cs.api.jobs.get_directory(self.project_uid, self.uid)) def queue( self, @@ -141,13 +170,13 @@ def queue( ): """ Queue a job to a target lane. Available lanes may be queried with - `CryoSPARC.get_lanes`_. + `:py:meth:`cs.get_lanes() `. Optionally specify a hostname for a node or cluster in the given lane. Optionally specify specific GPUs indexes to use for computation. Available hostnames for a given lane may be queried with - `CryoSPARC.get_targets`_. + `:py:meth:`cs.get_targets() `. Args: lane (str, optional): Configuried compute lane to queue to. Leave @@ -173,36 +202,16 @@ def queue( >>> job.queue("worker") >>> job.status "queued" - - .. _CryoSPARC.get_lanes: - tools.html#cryosparc.tools.CryoSPARC.get_lanes - .. _CryoSPARC.get_targets: - tools.html#cryosparc.tools.CryoSPARC.get_targets """ if cluster_vars: - self.cs.cli.set_cluster_job_custom_vars( # type: ignore - project_uid=self.project_uid, - job_uid=self.uid, - cluster_job_custom_vars=cluster_vars, - ) - self.cs.cli.enqueue_job( # type: ignore - project_uid=self.project_uid, - job_uid=self.uid, - lane=lane, - user_id=self.cs.user_id, - hostname=hostname, - gpus=gpus if gpus else False, - ) - self.refresh() + self.cs.api.jobs.set_cluster_custom_vars(self.project_uid, self.uid, cluster_vars) + self.model = self.cs.api.jobs.enqueue(self.project_uid, self.uid, lane=lane, hostname=hostname, gpus=gpus) def kill(self): """ Kill this job. """ - self.cs.cli.kill_job( # type: ignore - project_uid=self.project_uid, job_uid=self.uid, killed_by_user_id=self.cs.user_id - ) - self.refresh() + self.model = self.cs.api.jobs.kill(self.project_uid, self.uid) def wait_for_status(self, status: Union[JobStatus, Iterable[JobStatus]], *, timeout: Optional[int] = None) -> str: """ @@ -232,12 +241,10 @@ def wait_for_status(self, status: Union[JobStatus, Iterable[JobStatus]], *, time """ statuses = {status} if isinstance(status, str) else set(status) tic = time() - self.refresh() - while self.status not in statuses: + while self.refresh().status not in statuses: if timeout is not None and time() - tic > timeout: break sleep(5) - self.refresh() return self.status def wait_for_done(self, *, error_on_incomplete: bool = False, timeout: Optional[int] = None) -> str: @@ -272,8 +279,8 @@ def interact(self, action: str, body: Any = {}, *, timeout: int = 10, refresh: b refresh (bool, optional): If True, refresh the job document after posting. Defaults to False. """ - result: Any = self.cs.cli.interactive_post( # type: ignore - project_uid=self.project_uid, job_uid=self.uid, endpoint=action, body=body, timeout=timeout + result: Any = self.cs.api.jobs.interactive_post( + self.project_uid, self.uid, body=body, endpoint=action, timeout=timeout ) if refresh: self.refresh() @@ -283,10 +290,9 @@ def clear(self): """ Clear this job and reset to building status. """ - self.cs.cli.clear_job(self.project_uid, self.uid) # type: ignore - self.refresh() + self.model = self.cs.api.jobs.clear(self.project_uid, self.uid) - def set_param(self, name: str, value: Any, *, refresh: bool = True) -> bool: + def set_param(self, name: str, value: Any, **kwargs) -> bool: """ Set the given param name on the current job to the given value. Only works if the job is in "building" status. @@ -294,8 +300,6 @@ def set_param(self, name: str, value: Any, *, refresh: bool = True) -> bool: Args: name (str): Param name, as defined in the job document's ``params_base``. value (any): Target parameter value. - refresh (bool, optional): Auto-refresh job document after - connecting. Defaults to True. Returns: bool: False if the job encountered a build error. @@ -309,14 +313,12 @@ def set_param(self, name: str, value: Any, *, refresh: bool = True) -> bool: >>> job.set_param("compute_num_gpus", 4) True """ - result: bool = self.cs.cli.job_set_param( # type: ignore - project_uid=self.project_uid, job_uid=self.uid, param_name=name, param_new_value=value - ) - if refresh: - self.refresh() - return result + if "refresh" in kwargs: + warnings.warn("refresh argument no longer applies", DeprecationWarning, stacklevel=2) + self.model = self.cs.api.jobs.set_param(self.project_uid, self.uid, name, value=value) + return True - def connect(self, target_input: str, source_job_uid: str, source_output: str, *, refresh: bool = True) -> bool: + def connect(self, target_input: str, source_job_uid: str, source_output: str, **kwargs) -> bool: """ Connect the given input for this job to an output with given job UID and name. @@ -327,8 +329,6 @@ def connect(self, target_input: str, source_job_uid: str, source_output: str, *, source_job_uid (str): Job UID to connect from, e.g., "J42" source_output (str): Job output name to connect from , e.g., "particles" - refresh (bool, optional): Auto-refresh job document after - connecting. Defaults to True. Returns: bool: False if the job encountered a build error. @@ -344,17 +344,16 @@ def connect(self, target_input: str, source_job_uid: str, source_output: str, *, >>> job.connect("input_micrographs", "J2", "micrographs") """ - assert source_job_uid != self.uid, f"Cannot connect job {self.uid} to itself" - result: bool = self.cs.cli.job_connect_group( # type: ignore - project_uid=self.project_uid, - source_group=f"{source_job_uid}.{source_output}", - dest_group=f"{self.uid}.{target_input}", + if "refresh" in kwargs: + warnings.warn("refresh argument no longer applies", DeprecationWarning, stacklevel=2) + if source_job_uid == self.uid: + raise ValueError(f"Cannot connect job {self.uid} to itself") + self.model = self.cs.api.jobs.connect( + self.project_uid, self.uid, target_input, source_job_uid=source_job_uid, source_output_name=source_output ) - if refresh: - self.refresh() - return result + return True - def disconnect(self, target_input: str, connection_idx: Optional[int] = None, *, refresh: bool = True): + def disconnect(self, target_input: str, connection_idx: Optional[int] = None, **kwargs): """ Clear the given job input group. @@ -363,39 +362,26 @@ def disconnect(self, target_input: str, connection_idx: Optional[int] = None, *, connection_idx (int, optional): Connection index to clear. Set to 0 to clear the first connection, 1 for the second, etc. If unspecified, clears all connections. Defaults to None. - refresh (bool, optional): Auto-refresh job document after - connecting. Defaults to True. - """ - if connection_idx is None: - # Clear all input connections - input_group = first(group for group in self.doc["input_slot_groups"] if group["name"] == target_input) - if not input_group: - raise ValueError(f"Unknown input group {target_input} for job {self.project_uid}-{self.uid}") - for _ in input_group["connections"]: - self.cs.cli.job_connected_group_clear( # type: ignore - project_uid=self.project_uid, - dest_group=f"{self.uid}.{target_input}", - connect_idx=0, - ) - else: - self.cs.cli.job_connected_group_clear( # type: ignore - project_uid=self.project_uid, - dest_group=f"{self.uid}.{target_input}", - connect_idx=connection_idx, - ) + """ + if "refresh" in kwargs: + warnings.warn("refresh argument no longer applies", DeprecationWarning, stacklevel=2) - if refresh: - self.refresh() + if connection_idx is None: # Clear all input connections + self.model = self.cs.api.jobs.disconnect_all(self.project_uid, self.uid, target_input) + else: + self.model = self.cs.api.jobs.disconnect(self.project_uid, self.uid, target_input, connection_idx) - def load_input(self, name: str, slots: Iterable[str] = []): + def load_input(self, name: str, slots: LoadableSlots = "all"): """ Load the dataset connected to the job's input with the given name. Args: name (str): Input to load - fields (list[str], optional): List of specific slots to load, such - as ``movie_blob`` or ``locations``, or all slots if not - specified. Defaults to []. + slots (Literal["default", "passthrough", "all"] | list[str], optional): + List of specific slots to load, such as ``movie_blob`` or + ``locations``, or all slots if not specified (including + passthrough). May also specify as keyword. Defaults to + "all". Raises: TypeError: If the job doesn't have the given input or the dataset @@ -404,27 +390,19 @@ def load_input(self, name: str, slots: Iterable[str] = []): Returns: Dataset: Loaded dataset """ - job = self.doc - group = first(s for s in job["input_slot_groups"] if s["name"] == name) - if not group: - raise TypeError(f"Job {self.project_uid}-{self.uid} does not have an input {name}") - - data = {"project_uid": self.project_uid, "job_uid": self.uid, "input_name": name, "slots": list(slots)} - with make_json_request(self.cs.vis, "/load_job_input", data=data) as response: - mime = response.headers.get("Content-Type") - if mime != "application/x-cryosparc-dataset": - raise TypeError(f"Unable to load dataset for job {self.project_uid}-{self.uid} input {name}") - return Dataset.load(response) + return self.cs.api.jobs.load_input(self.project_uid, self.uid, name, slots=slots) - def load_output(self, name: str, slots: Iterable[str] = [], version: Union[int, Literal["F"]] = "F"): + def load_output(self, name: str, slots: LoadableSlots = "all", version: Union[int, Literal["F"]] = "F"): """ Load the dataset for the job's output with the given name. Args: name (str): Output to load - slots (list[str], optional): List of specific slots to load, - such as ``movie_blob`` or ``locations``, or all slots if - not specified (including passthrough). Defaults to []. + slots (Literal["default", "passthrough", "all"] | list[str], optional): + List of specific slots to load, such as ``movie_blob`` or + ``locations``, or all slots if not specified (including + passthrough). May also specify as keyword. Defaults to + "all". version (int | Literal["F"], optional): Specific output version to load. Use this to load the output at different stages of processing. Leave unspecified to load final verion. Defaults to @@ -436,38 +414,7 @@ def load_output(self, name: str, slots: Iterable[str] = [], version: Union[int, Returns: Dataset: Loaded dataset """ - job = self.doc - slots = set(slots) - version = -1 if version == "F" else version - results = [ - result - for result in job["output_results"] - if result["group_name"] == name and (not slots or result["name"] in slots) - ] - if not slots: - # Requested all slots, but auto-filter results with no provided meta - # files - results = [result for result in results if result["metafiles"]] - if not results: - raise TypeError(f"Job {self.project_uid}-{self.uid} does not have any results for output {name}") - - metafiles = [] - for r in results: - if r["metafiles"]: - metafile = r["metafiles"][0 if r["passthrough"] else version] - if metafile not in metafiles: - metafiles.append(metafile) - else: - raise ValueError( - ( - f"Cannot load output {name} slot {r['name']} because " - "output does not have an associated dataset file. " - "Please exclude this output from the requested slots." - ) - ) - - datasets = [self.cs.download_dataset(self.project_uid, f) for f in metafiles] - return Dataset.innerjoin(*datasets) + return self.cs.api.jobs.load_output(self.project_uid, self.uid, name, slots=slots, version=version) def log(self, text: str, level: Literal["text", "warning", "error"] = "text"): """ @@ -481,9 +428,8 @@ def log(self, text: str, level: Literal["text", "warning", "error"] = "text"): Returns: str: Created log event ID """ - return self.cs.cli.job_send_streamlog( # type: ignore - project_uid=self.project_uid, job_uid=self.uid, message=text, error=level != "text" - ) + event = self.cs.api.jobs.add_event_log(self.project_uid, self.uid, text, type=level) + return event.id def log_checkpoint(self, meta: dict = {}): """ @@ -495,9 +441,8 @@ def log_checkpoint(self, meta: dict = {}): Returns: str: Created checkpoint event ID """ - return self.cs.cli.job_checkpoint_streamlog( # type: ignore - project_uid=self.project_uid, job_uid=self.uid, meta=meta - ) + event = self.cs.api.jobs.add_checkpoint(self.project_uid, self.uid, meta) + return event.id def log_plot( self, @@ -564,10 +509,8 @@ def log_plot( raw_data_format=raw_data_format, savefig_kw=savefig_kw, ) - - return self.cs.cli.job_send_streamlog( # type: ignore - project_uid=self.project_uid, job_uid=self.uid, message=text, flags=flags, imgfiles=imgfiles - ) + event = self.cs.api.jobs.add_image_log(self.project_uid, self.uid, imgfiles, text=text, flags=flags) + return event.id def list_files(self, prefix: Union[str, PurePosixPath] = "", recursive: bool = False) -> List[str]: """ @@ -657,7 +600,7 @@ def download_mrc(self, path: Union[str, PurePosixPath]): path = PurePosixPath(self.uid) / path return self.cs.download_mrc(self.project_uid, path) - def list_assets(self) -> List[AssetDetails]: + def list_assets(self) -> List[GridFSFile]: """ Get a list of files available in the database for this job. Returns a list with details about the assets. Each entry is a dict with a ``_id`` @@ -665,9 +608,9 @@ def list_assets(self) -> List[AssetDetails]: method. Returns: - list[AssetDetails]: Asset details + list[GridFSFile]: Asset details """ - return self.cs.vis.list_job_files(project_uid=self.project_uid, job_uid=self.uid) # type: ignore + return self.cs.list_assets(self.project_uid, self.uid) def download_asset(self, fileid: str, target: Union[str, PurePath, IO[bytes]]): """ @@ -676,11 +619,11 @@ def download_asset(self, fileid: str, target: Union[str, PurePath, IO[bytes]]): Args: fileid (str): GridFS file object ID - target (str | Path | IO): Local file path, directory path or - writeable file handle to write response data. + target (str | Path | IO): Local file path or writeable file handle + to write response data. Returns: - Path | IO: resulting target path or file handle. + str | Path | IO: resulting target path or file handle. """ return self.cs.download_asset(fileid, target) @@ -712,7 +655,7 @@ def upload_asset( file: Union[str, PurePath, IO[bytes]], filename: Optional[str] = None, format: Optional[AssetFormat] = None, - ) -> EventLogAsset: + ) -> GridFSAsset: """ Upload an image or text file to the current job. Specify either an image (PNG, JPG, GIF, PDF, SVG), text file (TXT, CSV, JSON, XML) or a binary @@ -742,33 +685,20 @@ def upload_asset( Returns: EventLogAsset: Dictionary including details about uploaded asset. """ + ext = None if format: - assert format in ASSET_CONTENT_TYPES, f"Invalid asset format {format}" + ext = format elif filename: - ext = filename.split(".")[-1] - assert ext in ASSET_CONTENT_TYPES, f"Invalid asset format {ext}" - format = ext + ext = filename.split(".")[-1].lower() elif isinstance(file, (str, PurePath)): file = PurePath(file) filename = file.name - ext = filename.split(".")[-1] - assert ext in ASSET_CONTENT_TYPES, f"Invalid asset format {ext}" - format = ext + ext = file.suffix[1:].lower() else: - raise ValueError("Must specify filename or format when saving binary asset handle") - - with bopen(file) as f: - url = f"/projects/{self.project_uid}/jobs/{self.uid}/files" - query = {"format": format} - if filename: - query["filename"] = filename - - with make_request(self.cs.vis, url=url, query=query, data=f) as res: - assert res.status >= 200 and res.status < 300, ( - f"Could not upload project {self.project_uid} asset {file}.\n" - f"Response from CryoSPARC: {res.read().decode()}" - ) - return json.loads(res.read()) + raise ValueError("Must specify filename or format when saving binary asset") + if ext not in ASSET_CONTENT_TYPES: + raise ValueError(f"Invalid asset format {ext}") + return self.cs.api.assets.upload(self.project_uid, self.uid, Stream.load(file), filename=filename, format=ext) def upload_plot( self, @@ -779,7 +709,7 @@ def upload_plot( raw_data_file: Union[str, PurePath, IO[bytes], None] = None, raw_data_format: Optional[TextFormat] = None, savefig_kw: dict = dict(bbox_inches="tight", pad_inches=0), - ) -> List[EventLogAsset]: + ) -> List[GridFSAsset]: """ Upload the given figure. Returns a list of the created asset objects. Avoid using directly; use ``log_plot`` instead. See ``log_plot`` @@ -815,7 +745,8 @@ def upload_plot( basename = name or "figure" if hasattr(figure, "savefig"): # matplotlib plot for fmt in formats: - assert fmt in IMAGE_CONTENT_TYPES, f"Invalid figure format {fmt}" + if fmt not in IMAGE_CONTENT_TYPES: + raise ValueError(f"Invalid figure format {fmt}") filename = f"{basename}.{fmt}" data = BytesIO() figure.savefig(data, format=fmt, **savefig_kw) # type: ignore @@ -824,13 +755,15 @@ def upload_plot( elif isinstance(figure, (str, PurePath)): # file path; assume format from filename path = PurePath(figure) basename = path.stem - fmt = str(figure).split(".")[-1] - assert fmt in IMAGE_CONTENT_TYPES, f"Invalid figure format {fmt}" + fmt = path.suffix[1:].lower() + if fmt not in IMAGE_CONTENT_TYPES: + raise ValueError(f"Invalid figure format {fmt}") filename = f"{name or path.stem}.{fmt}" figdata.append((figure, filename, fmt)) else: # Binary IO fmt = first(iter(formats)) - assert fmt in IMAGE_CONTENT_TYPES, f"Invalid or unspecified figure format {fmt}" + if fmt not in IMAGE_CONTENT_TYPES: + raise ValueError(f"Invalid or unspecified figure format {fmt}") filename = f"{basename}.{fmt}" figdata.append((figure, filename, fmt)) @@ -845,17 +778,13 @@ def upload_plot( raw_data_path = PurePath(raw_data_file) raw_data_filename = raw_data_path.name ext = raw_data_format or raw_data_filename.split(".")[-1] - assert ext in TEXT_CONTENT_TYPES, f"Invalid raw data filename {raw_data_file}" + if ext not in TEXT_CONTENT_TYPES: + raise ValueError(f"Invalid raw data filename {raw_data_file}") raw_data_format = ext - assets = [] - for data, filename, fmt in figdata: - asset = self.upload_asset(data, filename=filename, format=fmt) - assets.append(asset) - + assets = [self.upload_asset(data, filename, fmt) for data, filename, fmt in figdata] if raw_data_file: - raw_data_format = raw_data_format or "txt" - asset = self.upload_asset(raw_data_file, filename=raw_data_filename, format=raw_data_format) + asset = self.upload_asset(raw_data_file, raw_data_filename, raw_data_format or "txt") assets.append(asset) return assets @@ -1011,13 +940,13 @@ def subprocess( args = args if isinstance(args, str) else list(map(str, args)) with subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, **kwargs) as proc: - assert proc.stdout, f"Subprocess {args} has not standard output" + assert proc.stdout, f"Subprocess {args} has no standard output" if checkpoint: self.log_checkpoint() - self.log("======= Forwarding subprocess output for the following command =======") + self.log("─────── Forwarding subprocess output for the following command ───────") self.log(str(args)) - self.log("======================================================================") + self.log("──────────────────────────────────────────────────────────────────────") for line in proc.stdout: line = line.rstrip() @@ -1035,7 +964,7 @@ def subprocess( self.log(msg, level="error") raise RuntimeError(msg) - self.log("======================= Subprocess complete. =========================") + self.log("─────────────────────── Subprocess complete. ─────────────────────────") def print_param_spec(self): """ @@ -1059,10 +988,11 @@ def print_param_spec(self): """ headings = ["Param", "Title", "Type", "Default"] rows = [] - for key, details in self.doc["params_base"].items(): - if details["hidden"]: + for key, details in self.full_spec.params.items(): + if details.get("hidden") is True: continue - rows.append([key, details["title"], details["type"], repr(details["value"])]) + type = (details["anyOf"][0] if "anyOf" in details else details).get("type", "Any") + rows.append([key, details["title"], type, repr(details.get("default", None))]) print_table(headings, rows) def print_input_spec(self): @@ -1090,21 +1020,22 @@ def print_input_spec(self): | | | | alignments2D | alignments2D | ✕ | | | | alignments3D | alignments3D | ✕ """ + specs = self.cs.api.jobs.get_input_specs(self.project_uid, self.uid) headings = ["Input", "Title", "Type", "Required?", "Input Slots", "Slot Types", "Slot Required?"] rows = [] - for group in self.doc["input_slot_groups"]: - name, title, type = group["name"], group["title"], group["type"] - required = f"✓ ({group['count_min']}" if group["count_min"] else "✕ (0" - if group["count_max"] in {None, 0, 0.0, math.inf, n.inf}: + for key, spec in specs.root.items(): + name, title, type = key, spec.title, spec.type + required = f"✓ ({spec.count_min}" if spec.count_min else "✕ (0" + if spec.count_max in (0, "inf"): required += "+)" # unlimited connections - elif group["count_min"] == group["count_max"]: + elif spec.count_min == spec.count_max: required += ")" else: - required += f"-{group['count_max']})" - for slot in group["slots"]: - slot_required = "✕" if slot["optional"] else "✓" - rows.append([name, title, type, required, slot["name"], slot["type"].split(".").pop(), slot_required]) - name, title, type, required = ("",) * 4 + required += f"-{spec.count_max})" + for slot in spec.slots: + slot = as_input_slot(slot) + rows.append([name, title, type, required, slot.name, slot.dtype, "✓" if slot.required else "✕"]) + name, title, type, required = ("",) * 4 # only show group info on first iter print_table(headings, rows) def print_output_spec(self): @@ -1119,28 +1050,33 @@ def print_output_spec(self): >>> job.doc['type'] 'extract_micrographs_multi' >>> job.print_output_spec() - Output | Title | Type | Result Slots | Result Types - ========================================================================================== - micrographs | Micrographs | exposure | micrograph_blob | micrograph_blob - | | | micrograph_blob_non_dw | micrograph_blob - | | | background_blob | stat_blob - | | | ctf | ctf - | | | ctf_stats | ctf_stats - | | | mscope_params | mscope_params - particles | Particles | particle | blob | blob - | | | ctf | ctf - """ - headings = ["Output", "Title", "Type", "Result Slots", "Result Types"] + Output | Title | Type | Result Slots | Result Types | Passthrough? + ========================================================================================================= + micrographs | Micrographs | exposure | micrograph_blob | micrograph_blob | ✕ + | | | micrograph_blob_non_dw | micrograph_blob | ✓ + | | | background_blob | stat_blob | ✓ + | | | ctf | ctf | ✓ + | | | ctf_stats | ctf_stats | ✓ + | | | mscope_params | mscope_params | ✓ + particles | Particles | particle | blob | blob | ✕ + | | | ctf | ctf | ✕ + """ + specs = self.cs.api.jobs.get_output_specs(self.project_uid, self.uid) + headings = ["Output", "Title", "Type", "Result Slots", "Result Types", "Passthrough?"] rows = [] - for group in self.doc["output_result_groups"]: - name, title, type = group["name"], group["title"], group["type"] - for result in group["contains"]: - rows.append([name, title, type, result["name"], result["type"].split(".").pop()]) + for key, spec in specs.root.items(): + output = self.model.spec.outputs.root.get(key) + if not output: + warnings.warn(f"No results for input {key}", stacklevel=2) + continue + name, title, type = key, spec.title, spec.type + for result in output.results: + rows.append([name, title, type, result.name, result.dtype, "✓" if result.passthrough else "✕"]) name, title, type = "", "", "" # only these print once per group print_table(headings, rows) -class ExternalJob(Job): +class ExternalJobController(JobController): """ Mutable custom output job with customizeble input slots and output results. Use External jobs to save data save cryo-EM data generated by a software @@ -1150,14 +1086,10 @@ class ExternalJob(Job): an input. Its outputs must be created manually and may be configured to passthrough inherited input fields, just as with regular CryoSPARC jobs. - Create a new External Job with `Project.create_external_job`_. ExternalJob - is a subclass of `Job`_ and inherits all its methods. - - Attributes: - uid (str): Job unique ID, e.g., "J42" - project_uid (str): Project unique ID, e.g., "P3" - doc (JobDocument): All job data from the CryoSPARC database. Database - contents may change over time, use the `refresh`_ method to update. + Create a new External Job with :py:meth:`project.create_external_job() `. + or :py:meth:`workspace.create_external_job() `. + ``ExternalJobController`` is a subclass of :py:class:`JobController` + and inherits all its methods and attributes. Examples: @@ -1176,25 +1108,22 @@ class ExternalJob(Job): ... ) ... dset['movie_blob/path'] = ... # populate dataset ... job.save_output(output_name, dset) - - .. _Job: - #cryosparc.job.Job - - .. _refresh: - #cryosparc.job.Job.refresh - - .. _Project.create_external_job: - project.html#cryosparc.project.Project.create_external_job """ + def __init__(self, cs: "CryoSPARC", job: Union[Tuple[str, str], Job]) -> None: + super().__init__(cs, job) + if self.model.spec.type != "snowflake": + raise TypeError(f"Job {self.model.project_uid}-{self.model.uid} is not an external job") + def add_input( self, type: Datatype, name: Optional[str] = None, min: int = 0, max: Union[int, Literal["inf"]] = "inf", - slots: Iterable[SlotSpec] = [], + slots: Sequence[SlotSpec] = [], title: Optional[str] = None, + desc: Optional[str] = None, ): """ Add an input slot to the current job. May be connected to zero or more @@ -1203,17 +1132,21 @@ def add_input( Args: type (Datatype): cryo-EM data type for this output, e.g., "particle" name (str, optional): Output name key, e.g., "picked_particles". - Defaults to None. + Same as ``type`` if not specified. Defaults to None. min (int, optional): Minimum number of required input connections. Defaults to 0. max (int | Literal["inf"], optional): Maximum number of input connections. Specify ``"inf"`` for unlimited connections. Defaults to "inf". slots (list[SlotSpec], optional): List of slots that should - be connected to this input, such as ``"location"`` or ``"blob"`` + be connected to this input, such as ``"location"`` or ``"blob"``. + When connecting the input, if the source job output is missing + these slots, the external job cannot start or accept outputs. Defaults to []. title (str, optional): Human-readable title for this input. Defaults - to None. + to name. + desc (str, optional): Human-readable description for this input. + Defaults to None. Raises: CommandError: General CryoSPARC network access error such as @@ -1246,51 +1179,38 @@ def add_input( f'Invalid input name "{name}"; may only contain letters, numbers and underscores, ' "and must start with a letter" ) - try: - self.cs.vis.add_external_job_input( # type: ignore - project_uid=self.project_uid, - job_uid=self.uid, + if any(isinstance(s, dict) and "prefix" in s for s in slots): + warnings.warn("'prefix' slot key is deprecated. Use 'name' instead.", DeprecationWarning, stacklevel=2) + if not name: + name = type + if not title: + title = name + self.model = self.cs.api.jobs.add_external_input( + self.project_uid, + self.uid, + name, + InputSpec( type=type, - name=name, - min=min, - max=max, - slots=slots, title=title, - ) - except CommandError as err: - if err.code == 422 and err.data and "slots" in err.data: - raise InvalidSlotsError("add_input", err.data["slots"]) from err - raise - self.refresh() - return self.doc["input_slot_groups"][-1]["name"] + description=desc or "", + slots=[as_input_slot(slot) for slot in slots], + count_min=min, + count_max=max, + ), + ) + return name + # fmt: off @overload - def add_output( - self, - type: Datatype, - name: Optional[str] = ..., - slots: List[SlotSpec] = ..., - passthrough: Optional[str] = ..., - title: Optional[str] = ..., - *, - alloc: Literal[None] = None, - ) -> str: ... + def add_output(self, type: Datatype, name: Optional[str] = ..., slots: Sequence[SlotSpec] = ..., passthrough: Optional[str] = ..., title: Optional[str] = ...) -> str: ... @overload - def add_output( - self, - type: Datatype, - name: Optional[str] = ..., - slots: List[SlotSpec] = ..., - passthrough: Optional[str] = ..., - title: Optional[str] = ..., - *, - alloc: Union[int, Dataset] = ..., - ) -> Dataset: ... + def add_output(self, type: Datatype, name: Optional[str] = ..., slots: Sequence[SlotSpec] = ..., passthrough: Optional[str] = ..., title: Optional[str] = ..., *, alloc: Union[int, Dataset]) -> Dataset: ... + # fmt: on def add_output( self, type: Datatype, name: Optional[str] = None, - slots: List[SlotSpec] = [], + slots: Sequence[SlotSpec] = [], passthrough: Optional[str] = None, title: Optional[str] = None, *, @@ -1360,9 +1280,9 @@ def add_output( ... type="particle", ... name="particle_alignments", ... slots=[ - ... {"dtype": "alignments3D", "prefix": "alignments_class_0", "required": True}, - ... {"dtype": "alignments3D", "prefix": "alignments_class_1", "required": True}, - ... {"dtype": "alignments3D", "prefix": "alignments_class_2", "required": True}, + ... {"name": "alignments_class_0", "dtype": "alignments3D", "required": True}, + ... {"name": "alignments_class_1", "dtype": "alignments3D", "required": True}, + ... {"name": "alignments_class_2", "dtype": "alignments3D", "required": True}, ... ] ... ) "particle_alignments" @@ -1372,23 +1292,19 @@ def add_output( f'Invalid output name "{name}"; may only contain letters, numbers and underscores, ' "and must start with a letter" ) - try: - self.cs.vis.add_external_job_output( # type: ignore - project_uid=self.project_uid, - job_uid=self.uid, - type=type, - name=name, - slots=slots, - passthrough=passthrough, - title=title, - ) - except CommandError as err: - if err.code == 422 and err.data and "slots" in err.data: - raise InvalidSlotsError("add_output", err.data["slots"]) from err - raise - self.refresh() - result_name = self.doc["output_result_groups"][-1]["name"] - return result_name if alloc is None else self.alloc_output(result_name, alloc) + if any(isinstance(s, dict) and "prefix" in s for s in slots): + warnings.warn("'prefix' slot key is deprecated. Use 'name' instead.", DeprecationWarning, stacklevel=2) + if not name: + name = type + if not title: + title = name + self.model = self.cs.api.jobs.add_external_output( + self.project_uid, + self.uid, + name, + OutputSpec(type=type, title=title, slots=[as_output_slot(slot) for slot in slots], passthrough=passthrough), + ) + return name if alloc is None else self.alloc_output(name, alloc) def connect( self, @@ -1396,15 +1312,15 @@ def connect( source_job_uid: str, source_output: str, *, - slots: List[SlotSpec] = [], - title: str = "", - desc: str = "", - refresh: bool = True, + slots: Sequence[SlotSpec] = [], + title: Optional[str] = None, + desc: Optional[str] = None, + **kwargs, ) -> bool: """ Connect the given input for this job to an output with given job UID and name. If this input does not exist, it will be added with the given - slots. At least one slot must be specified if the input does not exist. + slots. Args: target_input (str): Input name to connect into. Will be created if @@ -1412,14 +1328,14 @@ def connect( source_job_uid (str): Job UID to connect from, e.g., "J42" source_output (str): Job output name to connect from , e.g., "particles" - slots (list[SlotSpec], optional): List of slots to add to - created input. All if not specified. Defaults to []. + slots (list[SlotSpec], optional): List of input slots (e.g., + "particle" or "blob") to explicitly required for the created + input. If the given source job is missing these slots, the + external job cannot start or accept outputs. Defaults to []. title (str, optional): Human readable title for created input. - Defaults to "". + Defaults to target input name. desc (str, optional): Human readable description for created input. Defaults to "". - refresh (bool, optional): Auto-refresh job document after - connecting. Defaults to True. Raises: CommandError: General CryoSPARC network access error such as @@ -1437,27 +1353,25 @@ def connect( >>> job.connect("input_micrographs", "J2", "micrographs") """ - assert source_job_uid != self.uid, f"Cannot connect job {self.uid} to itself" - try: - self.cs.vis.connect_external_job( # type: ignore - project_uid=self.project_uid, - source_job_uid=source_job_uid, - source_output=source_output, - target_job_uid=self.uid, - target_input=target_input, - slots=slots, - title=title, - desc=desc, - ) - except CommandError as err: - if err.code == 422 and err.data and "slots" in err.data: - raise InvalidSlotsError("connect", err.data["slots"]) from err - raise - if refresh: - self.refresh() - return True - - def alloc_output(self, name: str, alloc: Union[int, "ArrayLike", Dataset] = 0) -> Dataset: + if "refresh" in kwargs: + warnings.warn("refresh argument no longer applies", DeprecationWarning, stacklevel=2) + if source_job_uid == self.uid: + raise ValueError(f"Cannot connect job {self.uid} to itself") + source_job = self.cs.api.jobs.find_one(self.project_uid, source_job_uid) + if source_output not in source_job.spec.outputs.root: + raise ValueError(f"Source job {source_job_uid} does not have output {source_output}") + output = source_job.spec.outputs.root[source_output] + if target_input not in self.model.spec.inputs.root: + if any(isinstance(s, dict) and "prefix" in s for s in slots): + warnings.warn("'prefix' slot key is deprecated. Use 'name' instead.", DeprecationWarning, stacklevel=2) + # convert to prevent from warning again + slots = [as_input_slot(slot) for slot in slots] # type: ignore + self.add_input(output.type, target_input, min=1, slots=slots, title=title, desc=desc) + return super().connect(target_input, source_job_uid, source_output) + + def alloc_output( + self, name: str, alloc: Union[int, "ArrayLike", Dataset] = 0, *, dtype_params: Dict[str, Any] = {} + ) -> Dataset: """ Allocate an empty dataset for the given output with the given name. Initialize with the given number of empty rows. The result may be @@ -1470,6 +1384,9 @@ def alloc_output(self, name: str, alloc: Union[int, "ArrayLike", Dataset] = 0) - (B) a numpy array of numbers to use for UIDs in the allocated dataset or (C) a dataset from which to inherit unique row IDs (useful for allocating passthrough outputs). Defaults to 0. + dtype_params (dict, optional): Data type parameters when allocating + results with dynamic column sizes such as ``particle`` -> + ``alignments3D_multi``. Defaults to {}. Returns: Dataset: Empty dataset with the given number of rows @@ -1498,17 +1415,7 @@ def alloc_output(self, name: str, alloc: Union[int, "ArrayLike", Dataset] = 0) - ]) """ - expected_fields = [] - for result in self.doc["output_results"]: - if result["group_name"] != name or result["passthrough"]: - continue - prefix = result["name"] - for field, dtype in result["min_fields"]: - expected_fields.append((f"{prefix}/{field}", dtype)) - - if not expected_fields: - raise ValueError(f"No such output {name} on {self.project_uid}-{self.uid}") - + expected_fields = self.cs.api.jobs.get_output_fields(self.project_uid, self.uid, name, dtype_params) if isinstance(alloc, int): return Dataset.allocate(alloc, expected_fields) elif isinstance(alloc, Dataset): @@ -1516,15 +1423,16 @@ def alloc_output(self, name: str, alloc: Union[int, "ArrayLike", Dataset] = 0) - else: return Dataset({"uid": alloc}).add_fields(expected_fields) - def save_output(self, name: str, dataset: Dataset, *, refresh: bool = True): + def save_output(self, name: str, dataset: Dataset, *, version: int = 0, **kwargs): """ Save output dataset to external job. Args: name (str): Name of output on this job. dataset (Dataset): Value of output with only required fields. - refresh (bool, Optional): Auto-refresh job document after saving. - Defaults to True + version (int, optional): Version number, when saving multiple + intermediate iterations. Only the last saved version is kept. + Defaults to 0. Examples: @@ -1537,13 +1445,9 @@ def save_output(self, name: str, dataset: Dataset, *, refresh: bool = True): >>> job.save_output("picked_particles", particles) """ - - url = f"/external/projects/{self.project_uid}/jobs/{self.uid}/outputs/{urllib.parse.quote_plus(name)}/dataset" - with make_request(self.cs.vis, url=url, data=dataset.stream(compression="lz4")) as res: - result = res.read().decode() - assert res.status >= 200 and res.status < 400, f"Save output failed with message: {result}" - if refresh: - self.refresh() + if "refresh" in kwargs: + warnings.warn("refresh argument no longer applies", DeprecationWarning, stacklevel=2) + self.model = self.cs.api.jobs.save_output(self.project_uid, self.uid, name, dataset, version=version) def start(self, status: Literal["running", "waiting"] = "waiting"): """ @@ -1552,24 +1456,24 @@ def start(self, status: Literal["running", "waiting"] = "waiting"): Args: status (str, optional): "running" or "waiting". Defaults to "waiting". """ - assert status in {"running", "waiting"}, f"Invalid start status {status}" - assert self.doc["status"] not in { - "running", - "waiting", - }, f"Job {self.project_uid}-{self.uid} is already in running status" - self.cs.cli.run_external_job(self.project_uid, self.uid, status) # type: ignore - self.refresh() + self.model = self.cs.api.jobs.mark_running(self.project_uid, self.uid, status=status) - def stop(self, error=False): + def stop(self, error: str = ""): """ - Set job status to "completed" or "failed" + Set job status to "completed" or "failed" if there was an error. Args: - error (bool, optional): Job completed with errors. Defaults to False. - """ - status = "failed" if error else "completed" - self.cs.cli.set_job_status(self.project_uid, self.uid, status) # type: ignore - self.refresh() + error (str, optional): Error message, will add to event log and set + job to status to failed if specified. Defaults to "". + """ + if isinstance(error, bool): # allowed bool in previous version + warnings.warn("error should be specified as a string", DeprecationWarning, stacklevel=2) + error = "An error occurred" if error else "" + self.model = self.cs.api.jobs.kill(self.project_uid, self.uid) + if error: + self.model = self.cs.api.jobs.mark_failed(self.project_uid, self.uid, error=error) + else: + self.model = self.cs.api.jobs.mark_completed(self.project_uid, self.uid) @contextmanager def run(self): @@ -1589,21 +1493,21 @@ def run(self): ... job.save_output(...) """ - error = False - self.start("running") + error = "" try: + self.start("running") yield self except Exception: - error = True + error = traceback.format_exc() raise finally: - self.stop(error) # TODO: Write Error to job log, if possible + self.stop(error=error) def queue( self, lane: Optional[str] = None, hostname: Optional[str] = None, - gpus: List[int] = [], + gpus: Sequence[int] = [], cluster_vars: Dict[str, Any] = {}, ): raise ExternalJobError( diff --git a/cryosparc/project.py b/cryosparc/controllers/project.py similarity index 83% rename from cryosparc/project.py rename to cryosparc/controllers/project.py index 755a899c..cb565f84 100644 --- a/cryosparc/project.py +++ b/cryosparc/controllers/project.py @@ -1,49 +1,57 @@ +import warnings from pathlib import PurePath, PurePosixPath from typing import IO, TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union -from .dataset import DEFAULT_FORMAT, Dataset -from .job import ExternalJob, Job -from .row import R -from .spec import Datatype, MongoController, ProjectDocument, SlotSpec -from .workspace import Workspace +from ..dataset import DEFAULT_FORMAT, Dataset +from ..dataset.row import R +from ..models.project import Project +from ..spec import Datatype, SlotSpec +from . import Controller, as_output_slot +from .job import ExternalJobController, JobController +from .workspace import WorkspaceController if TYPE_CHECKING: - from numpy.typing import NDArray # type: ignore + from numpy.typing import NDArray - from .tools import CryoSPARC + from ..tools import CryoSPARC -class Project(MongoController[ProjectDocument]): +class ProjectController(Controller[Project]): """ Accessor instance for CryoSPARC projects with ability to add workspaces, jobs - and upload/download project files. Should be instantiated through - `CryoSPARC.find_project`_. + and upload/download project files. Should be created with + :py:meth:`cs.find_project() `. - Attributes: - uid (str): Project unique ID, e.g., "P3" - doc (ProjectDocument): All project data from the CryoSPARC database. - Database contents may change over time, use the `refresh`_ method - to update. + Arguments: + project (str | Project): either Project UID or Project model, e.g. ``"P3"`` - .. _CryoSPARC.find_project: - tools.html#cryosparc.tools.CryoSPARC.find_project + Attributes: + model (Project): All project data from the CryoSPARC database. Contents + may change over time, use :py:meth:`refresh` to update. + """ - .. _refresh: - #cryosparc.project.Project.refresh + uid: str + """ + Project unique ID, e.g., "P3" """ - def __init__(self, cs: "CryoSPARC", uid: str) -> None: + def __init__(self, cs: "CryoSPARC", project: Union[str, Project]) -> None: self.cs = cs - self.uid = uid + if isinstance(project, str): + self.uid = project + self.refresh() + else: + self.uid = project.uid + self.model = project def refresh(self): """ Reload this project from the CryoSPARC database. Returns: - Project: self + ProjectController: self """ - self._doc = self.cs.cli.get_project(self.uid) # type: ignore + self.model = self.cs.api.projects.find_one(self.uid) return self def dir(self) -> PurePosixPath: @@ -53,10 +61,10 @@ def dir(self) -> PurePosixPath: Returns: Path: project directory Pure Path instance """ - path: str = self.cs.cli.get_project_dir_abs(self.uid) # type: ignore + path: str = self.cs.api.projects.get_directory(self.uid) return PurePosixPath(path) - def find_workspace(self, workspace_uid) -> Workspace: + def find_workspace(self, workspace_uid) -> WorkspaceController: """ Get a workspace accessor instance for the workspace in this project with the given UID. Fails with an error if workspace does not exist. @@ -65,12 +73,11 @@ def find_workspace(self, workspace_uid) -> Workspace: workspace_uid (str): Workspace unique ID, e.g., "W1" Returns: - Workspace: accessor instance + WorkspaceController: workspace accessor object """ - workspace = Workspace(self.cs, self.uid, workspace_uid) - return workspace.refresh() + return WorkspaceController(self.cs, (self.uid, workspace_uid)) - def find_job(self, job_uid: str) -> Job: + def find_job(self, job_uid: str) -> JobController: """ Get a job accessor instance for the job in this project with the given UID. Fails with an error if job does not exist. @@ -79,13 +86,11 @@ def find_job(self, job_uid: str) -> Job: job_uid (str): Job unique ID, e.g., "J42" Returns: - Job: accessor instance + JobController: job accessor instance """ - job = Job(self.cs, self.uid, job_uid) - job.refresh() - return job + return JobController(self.cs, (self.uid, job_uid)) - def find_external_job(self, job_uid: str) -> ExternalJob: + def find_external_job(self, job_uid: str) -> ExternalJobController: """ Get the External job accessor instance for an External job in this project with the given UID. Fails if the job does not exist or is not an @@ -98,11 +103,11 @@ def find_external_job(self, job_uid: str) -> ExternalJob: TypeError: If job is not an external job Returns: - ExternalJob: accessor instance + ExternalJobController: external job accessor object """ return self.cs.find_external_job(self.uid, job_uid) - def create_workspace(self, title: str, desc: Optional[str] = None) -> Workspace: + def create_workspace(self, title: str, desc: Optional[str] = None) -> WorkspaceController: """ Create a new empty workspace in this project. At least a title must be provided. @@ -112,7 +117,10 @@ def create_workspace(self, title: str, desc: Optional[str] = None) -> Workspace: desc (str, optional): Markdown text description. Defaults to None. Returns: - Workspace: created workspace instance + WorkspaceController: created workspace accessor object + + Raises: + APIError: Workspace cannot be created. """ return self.cs.create_workspace(self.uid, title, desc) @@ -122,12 +130,13 @@ def create_job( type: str, connections: Dict[str, Union[Tuple[str, str], List[Tuple[str, str]]]] = {}, params: Dict[str, Any] = {}, - title: Optional[str] = None, - desc: Optional[str] = None, - ) -> Job: + title: str = "", + desc: str = "", + ) -> JobController: """ - Create a new job with the given type. Use `CryoSPARC.get_job_sections`_ - to query available job types on the connected CryoSPARC instance. + Create a new job with the given type. Use + :py:attr:`cs.job_register ` + to find available job types on the connected CryoSPARC instance. Args: project_uid (str): Project UID to create job in, e.g., "P3" @@ -138,11 +147,14 @@ def create_job( value is a (job uid, output name) tuple. Defaults to {} params (dict[str, any], optional): Specify parameter values. Defaults to {}. - title (str, optional): Job title. Defaults to None. - desc (str, optional): Job markdown description. Defaults to None. + title (str, optional): Job title. Defaults to "". + desc (str, optional): Job markdown description. Defaults to "". Returns: - Job: created job instance. Raises error if job cannot be created. + JobController: created job accessor object. + + Raises: + APIError: Job cannot be created. Examples: @@ -161,9 +173,6 @@ def create_job( ... connections={"particles": ("J20", "particles_selected")} ... params={"abinit_K": 3} ... ) - - .. _CryoSPARC.get_job_sections: - tools.html#cryosparc.tools.CryoSPARC.get_job_sections """ return self.cs.create_job( self.uid, workspace_uid, type, connections=connections, params=params, title=title, desc=desc @@ -172,26 +181,23 @@ def create_job( def create_external_job( self, workspace_uid: str, - title: Optional[str] = None, - desc: Optional[str] = None, - ) -> ExternalJob: + title: str = "", + desc: str = "", + ) -> ExternalJobController: """ Add a new External job to this project to save generated outputs to. Args: workspace_uid (str): Workspace UID to create job in, e.g., "W3". title (str, optional): Title for external job (recommended). - Defaults to None. + Defaults to "". desc (str, optional): Markdown description for external job. - Defaults to None. + Defaults to "". Returns: ExternalJob: created external job instance """ - job_uid: str = self.cs.vis.create_external_job( # type: ignore - project_uid=self.uid, workspace_uid=workspace_uid, user=self.cs.user_id, title=title, desc=desc - ) - return self.find_external_job(job_uid) + return self.cs.create_external_job(self.uid, workspace_uid=workspace_uid, title=title, desc=desc) def save_external_result( self, @@ -201,8 +207,8 @@ def save_external_result( name: Optional[str] = None, slots: Optional[List[SlotSpec]] = None, passthrough: Optional[Tuple[str, str]] = None, - title: Optional[str] = None, - desc: Optional[str] = None, + title: str = "", + desc: str = "", ) -> str: """ Save the given result dataset to the project. Specify at least the @@ -268,13 +274,16 @@ def save_external_result( inherits slots from the specified output. e.g., ``("J1", "particles")``. Defaults to None. title (str, optional): Human-readable title for this output. - Defaults to None. + Defaults to "". desc (str, optional): Markdown description for this output. Defaults - to None. + to "". Returns: str: UID of created job where this output was saved """ + if slots and any(isinstance(s, dict) and "prefix" in s for s in slots): + warnings.warn("'prefix' slot key is deprecated. Use 'name' instead.", DeprecationWarning, stacklevel=2) + slots = [as_output_slot(slot) for slot in slots] # type: ignore return self.cs.save_external_result( self.uid, workspace_uid, diff --git a/cryosparc/workspace.py b/cryosparc/controllers/workspace.py similarity index 68% rename from cryosparc/workspace.py rename to cryosparc/controllers/workspace.py index f6a36c67..6f276b8a 100644 --- a/cryosparc/workspace.py +++ b/cryosparc/controllers/workspace.py @@ -1,50 +1,60 @@ +import warnings from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Union -from .dataset import Dataset -from .job import ExternalJob, Job -from .row import R -from .spec import Datatype, MongoController, SlotSpec, WorkspaceDocument +from ..dataset import Dataset +from ..dataset.row import R +from ..models.workspace import Workspace +from ..spec import Datatype, SlotSpec +from . import Controller, as_output_slot +from .job import ExternalJobController, JobController if TYPE_CHECKING: - from .tools import CryoSPARC + from ..tools import CryoSPARC -class Workspace(MongoController[WorkspaceDocument]): +class WorkspaceController(Controller[Workspace]): """ Accessor class to a workspace in CryoSPARC with ability create jobs and save - results. Should be instantiated through `CryoSPARC.find_workspace`_ or - `Project.find_workspace`_. + results. Should be created with` + :py:meth:`cs.find_workspace() ` or + :py:meth:`project.find_workspace() `. - Attributes: - uid (str): Workspace unique ID, e.g., "W42" - project_uid (str): Project unique ID, e.g., "P3" - doc (WorkspaceDocument): All workspace data from the CryoSPARC database. - Database contents may change over time, use the `refresh`_ method - to update. - - .. _CryoSPARC.find_workspace: - tools.html#cryosparc.tools.CryoSPARC.find_workspace + Arguments: + workspace (tuple[str, str] | Workspace): either _(Project UID, Workspace UID)_ + tuple or Workspace model, e.g. ``("P3", "W4")`` - .. _Project.find_workspace: - project.html#cryosparc.project.Project.find_workspace + Attributes: + model (Workspace): All workspace data from the CryoSPARC database. + Contents may change over time, use :py:method:`refresh` to update. + """ - .. _refresh: - #cryosparc.workspace.Workspace.refresh + uid: str + """ + Workspace unique ID, e.g., "W42" + """ + project_uid: str + """ + Project unique ID, e.g., "P3" """ - def __init__(self, cs: "CryoSPARC", project_uid: str, uid: str) -> None: + def __init__(self, cs: "CryoSPARC", workspace: Union[Tuple[str, str], Workspace]) -> None: self.cs = cs - self.project_uid = project_uid - self.uid = uid + if isinstance(workspace, tuple): + self.project_uid, self.uid = workspace + self.refresh() + else: + self.project_uid = workspace.project_uid + self.uid = workspace.uid + self.model = workspace def refresh(self): """ Reload this workspace from the CryoSPARC database. Returns: - Workspace: self + WorkspaceController: self """ - self._doc = self.cs.cli.get_workspace(self.project_uid, self.uid) # type: ignore + self.model = self.cs.api.workspaces.find_one(self.project_uid, self.uid) return self def create_job( @@ -52,13 +62,13 @@ def create_job( type: str, connections: Dict[str, Union[Tuple[str, str], List[Tuple[str, str]]]] = {}, params: Dict[str, Any] = {}, - title: Optional[str] = None, - desc: Optional[str] = None, - ) -> Job: + title: str = "", + desc: str = "", + ) -> JobController: """ - Create a new job with the given type. Use the - `CryoSPARC.get_job_sections`_ method to query available job types on - the connected CryoSPARC instance. + Create a new job with the given type. Use + :py:attr:`cs.job_register ` + to find available job types on the connected CryoSPARC instance. Args: project_uid (str): Project UID to create job in, e.g., "P3" @@ -69,11 +79,11 @@ def create_job( value is a (job uid, output name) tuple. Defaults to {} params (dict[str, any], optional): Specify parameter values. Defaults to {}. - title (str, optional): Job title. Defaults to None. - desc (str, optional): Job markdown description. Defaults to None. + title (str, optional): Job title. Defaults to "". + desc (str, optional): Job markdown description. Defaults to "". Returns: - Job: created job instance. Raises error if job cannot be created. + JobController: created job instance. Raises error if job cannot be created. Examples: @@ -92,9 +102,6 @@ def create_job( ... connections={"particles": ("J20", "particles_selected")} ... params={"abinit_K": 3} ... ) - - .. _CryoSPARC.get_job_sections: - tools.html#cryosparc.tools.CryoSPARC.get_job_sections """ return self.cs.create_job( self.project_uid, self.uid, type, connections=connections, params=params, title=title, desc=desc @@ -102,21 +109,21 @@ def create_job( def create_external_job( self, - title: Optional[str] = None, - desc: Optional[str] = None, - ) -> ExternalJob: + title: str = "", + desc: str = "", + ) -> ExternalJobController: """ Add a new External job to this workspace to save generated outputs to. Args: workspace_uid (str): Workspace UID to create job in, e.g., "W1" title (str, optional): Title for external job (recommended). - Defaults to None. + Defaults to "". desc (str, optional): Markdown description for external job. - Defaults to None. + Defaults to "". Returns: - ExternalJob: created external job instance + ExternalJobController: created external job instance """ return self.cs.create_external_job(self.project_uid, self.uid, title, desc) @@ -127,8 +134,8 @@ def save_external_result( name: Optional[str] = None, slots: Optional[List[SlotSpec]] = None, passthrough: Optional[Tuple[str, str]] = None, - title: Optional[str] = None, - desc: Optional[str] = None, + title: str = "", + desc: str = "", ) -> str: """ Save the given result dataset to a workspace. @@ -146,9 +153,9 @@ def save_external_result( inherits slots from the specified output. e.g., ``("J1", "particles")``. Defaults to None. title (str, optional): Human-readable title for this output. - Defaults to None. + Defaults to "". desc (str, optional): Markdown description for this output. Defaults - to None. + to "". Returns: str: UID of created job where this output was saved. @@ -189,6 +196,10 @@ def save_external_result( ... ) "J45" """ + if slots and any(isinstance(s, dict) and "prefix" in s for s in slots): + warnings.warn("'prefix' slot key is deprecated. Use 'name' instead.", DeprecationWarning, stacklevel=2) + # convert to prevent from warning again + slots = [as_output_slot(slot) for slot in slots] # type: ignore return self.cs.save_external_result( self.project_uid, self.uid, diff --git a/cryosparc/dataset.py b/cryosparc/dataset/__init__.py similarity index 96% rename from cryosparc/dataset.py rename to cryosparc/dataset/__init__.py index e027f9cc..f6fd2927 100644 --- a/cryosparc/dataset.py +++ b/cryosparc/dataset/__init__.py @@ -33,14 +33,16 @@ Callable, Collection, Dict, - Generator, Generic, + Iterator, List, + Literal, Mapping, MutableMapping, Optional, Sequence, Set, + SupportsIndex, Tuple, Type, Union, @@ -48,8 +50,10 @@ ) import numpy as n -from typing_extensions import Literal, SupportsIndex +from ..errors import DatasetLoadError +from ..stream import AsyncReadable, Streamable +from ..util import bopen, default_rng, random_integers, u32bytesle, u32intle from .column import Column from .core import Data, DsetType, Stream from .dtype import ( @@ -66,16 +70,11 @@ get_data_field_dtype, normalize_field, ) -from .errors import DatasetLoadError from .row import R, Row, Spool -from .stream import AsyncBinaryIO, Streamable -from .util import bopen, default_rng, random_integers, u32bytesle, u32intle if TYPE_CHECKING: from numpy.typing import ArrayLike, DTypeLike, NDArray - from .core import MemoryView - # Save format options NUMPY_FORMAT = 1 @@ -109,7 +108,9 @@ class Dataset(Streamable, MutableMapping[str, Column], Generic[R]): """ Accessor class for working with CryoSPARC .cs files. - A dataset may be initialized with ``Dataset(data)`` where ``data`` is + Load a dataset from disk with ``Dataset.load("/path/to/dataset.cs")``. + + Initialize a new dataset with ``Dataset(data)`` where ``data`` is one of the following: * A size of items to allocate (e.g., 42) @@ -147,6 +148,7 @@ class Dataset(Streamable, MutableMapping[str, Column], Generic[R]): __slots__ = ("_row_class", "_rows", "_data") + media_type = "application/x-cryosparc-dataset" _row_class: Type[R] _rows: Optional[Spool[R]] _data: Data @@ -570,6 +572,7 @@ def load( prefixes: Optional[Sequence[str]] = None, fields: Optional[Sequence[str]] = None, cstrs: bool = False, + media_type: Optional[str] = None, # for interface, otherwise unused ): """ Read a dataset from path or file handle. @@ -682,16 +685,17 @@ def _load_stream( descr = filter_descr(header["dtype"], keep_prefixes=prefixes, keep_fields=fields) field_names = {field[0] for field in descr} - # Calling addrows separately to minimizes column-based - # allocations, improves performance by ~20% + # Calling addrows separately to minimize column-based allocations, + # improves performance by ~20% dset = cls.allocate(0, descr) - if header["length"] == 0: - return dset # no more data to load - data = dset._data data.addrows(header["length"]) + + # If a dataset is empty, it won't have anything in its data section. + # Just the string heap at the end. + dtype = [] if header["length"] == 0 else header["dtype"] loader = Stream(data) - for field in header["dtype"]: + for field in dtype: colsize = u32intle(f.read(4)) if field[0] not in field_names: # try to seek instead of read to reduce memory usage @@ -700,8 +704,10 @@ def _load_stream( buffer = f.read(colsize) if field[0] in header["compressed_fields"]: loader.decompress_col(field[0], buffer) - else: - data.getbuf(field[0])[:] = buffer + continue + mem = data.getbuf(field[0]) + assert mem is not None, f"Could not load stream (missing {field[0]} buffer)" + mem[:] = buffer # Read in the string heap (rest of stream) # NOTE: There will be a bug here for long column keys that are @@ -717,7 +723,13 @@ def _load_stream( return dset @classmethod - async def from_async_stream(cls, stream: AsyncBinaryIO): + async def from_async_stream(cls, stream: AsyncReadable, *, media_type: Optional[str] = None): + prefix = await stream.read(6) + if prefix != FORMAT_MAGIC_PREFIXES[CSDAT_FORMAT]: + raise DatasetLoadError( + f"Incorrect async dataset stream format {prefix}. " + "Only CSDAT-formatted datasets may be loaded as async streams" + ) headersize = u32intle(await stream.read(4)) header = decode_dataset_header(await stream.read(headersize)) @@ -725,16 +737,22 @@ async def from_async_stream(cls, stream: AsyncBinaryIO): dset = cls.allocate(0, header["dtype"]) data = dset._data data.addrows(header["length"]) + + # If a dataset is empty, it won't have anything in its data section. + # Just the string heap at the end. + dtype = [] if header["length"] == 0 else header["dtype"] loader = Stream(data) - for field in header["dtype"]: + for field in dtype: colsize = u32intle(await stream.read(4)) buffer = await stream.read(colsize) if field[0] in header["compressed_fields"]: loader.decompress_col(field[0], buffer) - else: - data.getbuf(field[0])[:] = buffer + continue + mem = data.getbuf(field[0]) + assert mem is not None, f"Could not load stream (missing {field[0]} buffer)" + mem[:] = buffer - heap = stream.read() + heap = await stream.read() data.setstrheap(heap) # Convert C strings to Python strings @@ -742,7 +760,7 @@ async def from_async_stream(cls, stream: AsyncBinaryIO): dset.to_pystrs() return dset - def save(self, file: Union[str, PurePath, IO[bytes]], format: int = DEFAULT_FORMAT): + def save(self, file: Union[str, PurePath, IO[bytes]], *, format: int = DEFAULT_FORMAT): """ Save a dataset to the given path or I/O buffer. @@ -770,7 +788,7 @@ def save(self, file: Union[str, PurePath, IO[bytes]], format: int = DEFAULT_FORM else: raise TypeError(f"Invalid dataset save format for {file}: {format}") - def stream(self, compression: Literal["lz4", None] = None) -> Generator[bytes, None, None]: + def stream(self, compression: Literal["lz4", None] = None) -> Iterator[bytes]: """ Generate a binary representation for this dataset. Results may be written to a file or buffer to be sent over the network. @@ -802,16 +820,14 @@ def stream(self, compression: Literal["lz4", None] = None) -> Generator[bytes, N yield u32bytesle(len(header)) yield header - if len(self) == 0: - return # empty dataset, don't yield anything - - for f in self: - fielddata: "MemoryView" + fields = [] if len(self) == 0 else self.fields() + for f in fields: if f in compressed_fields: # obj columns added to strheap and loaded as indexes fielddata = stream.compress_col(f) else: fielddata = stream.stralloc_col(f) or data.getbuf(f) + assert fielddata is not None, f"Could not stream dataset (missing {f} buffer)" yield u32bytesle(len(fielddata)) yield bytes(fielddata.memview) @@ -1230,7 +1246,7 @@ def filter_prefix(self, keep_prefix: str, *, rename: Optional[str] = None, copy: if rename and rename != keep_prefix: new_fields = [f"{rename}/{f.split('/', 1)[1]}" for f in keep_fields] - result = type(self)([("uid", self["uid"])] + [(nf, self[f]) for f, nf in zip(keep_fields, new_fields)]) + result = type(self)([("uid", self["uid"])] + [(nf, self[f]) for f, nf in zip(keep_fields, new_fields)]) # type: ignore return result if copy else self._reset(result._data) def drop_fields(self, names: Union[Collection[str], Callable[[str], bool]], *, copy: bool = False): diff --git a/cryosparc/column.py b/cryosparc/dataset/column.py similarity index 95% rename from cryosparc/column.py rename to cryosparc/dataset/column.py index 3e84ee69..789fe917 100644 --- a/cryosparc/column.py +++ b/cryosparc/dataset/column.py @@ -2,9 +2,9 @@ import numpy as n +from ..util import hashcache, strencodenull from .core import Data from .dtype import Field, fielddtype -from .util import hashcache, strencodenull class Column(n.ndarray): @@ -52,7 +52,9 @@ def __new__(cls, field: Field, data: Data): dtype = n.dtype(fielddtype(field)) nrow = data.nrow() shape = (nrow, *dtype.shape) - buffer = data.getbuf(field[0]).memview if nrow else None + buffer = data.getbuf(field[0]) + if buffer is not None: + buffer = buffer.memview obj = super().__new__(cls, shape=shape, dtype=dtype.base, buffer=buffer) # type: ignore # Keep a reference to the data so that it only gets cleaned up when all diff --git a/cryosparc/dataset/core.pyi b/cryosparc/dataset/core.pyi new file mode 100644 index 00000000..e8f19aca --- /dev/null +++ b/cryosparc/dataset/core.pyi @@ -0,0 +1,81 @@ +from enum import Enum +from typing import SupportsBytes + +from numpy.typing import NDArray + +__all__ = ["DsetType", "Stream", "Data"] + +class MemoryView(SupportsBytes): # Note: Supports buffer protocol. + base: "Array" + size: int + itemsize: int + nbytes: int + ndim: int + shape: tuple[int, ...] + strides: tuple[int, ...] + suboffsets: tuple[int, ...] + T: "MemoryView" + + def copy(self) -> "MemoryView": ... + def copy_fortran(self) -> "MemoryView": ... + def is_c_contig(self) -> bool: ... + def is_f_contig(self) -> bool: ... + +class Array: + memview: MemoryView + + def __len__(self) -> int: ... + def __getitem__(self, key: int | slice) -> bytes: ... + def __setitem__(self, key: int | slice, item: bytes): ... + +class DsetType(int, Enum): + T_F32 = ... + T_F64 = ... + T_C32 = ... + T_C64 = ... + T_I8 = ... + T_I16 = ... + T_I32 = ... + T_I64 = ... + T_U8 = ... + T_U16 = ... + T_U32 = ... + T_U64 = ... + T_STR = ... + T_OBJ = ... + +class Data: + def __init__(self, other: "Data" | None = None) -> None: ... + def innerjoin(self, key: str, other: "Data") -> "Data": ... + def totalsz(self) -> int: ... + def ncol(self) -> int: ... + def nrow(self) -> int: ... + def key(self, index: int) -> str: ... + def type(self, field: str) -> int: ... + def has(self, field: str) -> bool: ... + def addrows(self, num: int) -> None: ... + def addcol_scalar(self, field: str, dtype: int) -> None: ... + def addcol_array(self, field: str, dtype: int, shape: tuple[int, ...]) -> None: ... + def getshp(self, colkey: str) -> tuple[int, ...]: ... + def getbuf(self, colkey: str) -> Array | None: ... + def getstr(self, col: str, index: int) -> bytes: ... + def tocstrs(self, col: str) -> bool: ... + def topystrs(self, col: str) -> bool: ... + def stralloc(self, val: str) -> int: ... + def dump(self) -> Array: ... + def dumpstrheap(self) -> Array: ... + def setstrheap(self, heap: bytes) -> None: ... + def defrag(self, realloc_smaller: bool) -> None: ... + def dumptxt(self, dump_data: bool = False) -> None: ... + def handle(self) -> int: ... + +class Stream: + def __init__(self, data: Data) -> None: ... + def cast_objs_to_strs(self) -> None: ... + def stralloc_col(self, col: str) -> Array | None: ... + def compress_col(self, col: str) -> Array: ... + def compress_numpy(self, arr: NDArray) -> Array: ... + def compress(self, arr: Array) -> Array: ... + def decompress_col(self, col: str, data: bytes) -> Array: ... + def decompress_numpy(self, data: bytes, arr: NDArray) -> Array: ... + def decompress(self, data: bytes, outptr: int = 0) -> Array: ... diff --git a/cryosparc/core.pyx b/cryosparc/dataset/core.pyx similarity index 99% rename from cryosparc/core.pyx rename to cryosparc/dataset/core.pyx index 384ebb9d..6327cbe0 100644 --- a/cryosparc/core.pyx +++ b/cryosparc/dataset/core.pyx @@ -142,7 +142,7 @@ cdef class Data: with nogil: mem = dataset.dset_get(self._handle, colkey_c) size = dataset.dset_getsz(self._handle, colkey_c) - return 0 if size == 0 else mem + return None if size == 0 else mem def getstr(self, str col, size_t index): return dataset.dset_getstr(self._handle, col.encode(), index) # returns bytes diff --git a/cryosparc/dataset.c b/cryosparc/dataset/dataset.c similarity index 100% rename from cryosparc/dataset.c rename to cryosparc/dataset/dataset.c diff --git a/cryosparc/dataset.pxd b/cryosparc/dataset/dataset.pxd similarity index 100% rename from cryosparc/dataset.pxd rename to cryosparc/dataset/dataset.pxd diff --git a/cryosparc/dtype.py b/cryosparc/dataset/dtype.py similarity index 85% rename from cryosparc/dtype.py rename to cryosparc/dataset/dtype.py index 9359bd55..3d8811ea 100644 --- a/cryosparc/dtype.py +++ b/cryosparc/dataset/dtype.py @@ -3,42 +3,17 @@ """ import json -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Type, Union +from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Sequence, Type, TypedDict, Union import numpy as n -from typing_extensions import Literal, Sequence, TypedDict +from ..errors import DatasetLoadError +from ..spec import DType, Field from .core import Data, DsetType if TYPE_CHECKING: from numpy.typing import DTypeLike, NDArray -Shape = Tuple[int, ...] -"""A numpy shape tuple from ndarray.shape""" - -DType = Union[str, Tuple[str, Shape]] -""" - - Can just be a single string such as "f4", "3u4" or "O". - A datatype description of a ndarray entry. - - Can also be the a tuple with a string datatype name and its shape. For - example, the following dtypes are equivalent. - - - "3u4" - - " DatasetHeader: compressed_fields=compressed_fields, ) except Exception as e: - raise ValueError(f"Incorrect dataset field format: {data.decode() if isinstance(data, bytes) else data}") from e + raise DatasetLoadError( + f"Incorrect dataset field format: {data.decode() if isinstance(data, bytes) else data}" + ) from e diff --git a/cryosparc/lz4.pxd b/cryosparc/dataset/lz4.pxd similarity index 100% rename from cryosparc/lz4.pxd rename to cryosparc/dataset/lz4.pxd diff --git a/cryosparc/row.py b/cryosparc/dataset/row.py similarity index 99% rename from cryosparc/row.py rename to cryosparc/dataset/row.py index adb80848..173d791a 100644 --- a/cryosparc/row.py +++ b/cryosparc/dataset/row.py @@ -2,8 +2,8 @@ import numpy as n +from ..util import default_rng, random_integers from .column import Column -from .util import default_rng, random_integers class Row(Mapping): diff --git a/cryosparc/errors.py b/cryosparc/errors.py index 15847544..e024bf8d 100644 --- a/cryosparc/errors.py +++ b/cryosparc/errors.py @@ -2,11 +2,11 @@ Definitions for various error classes raised by cryosparc-tools functions """ -from typing import Any, List +import json +from typing import TYPE_CHECKING, Any -from typing_extensions import TypedDict - -from .spec import Datafield, Datatype, SlotSpec +if TYPE_CHECKING: + from httpx import Response class DatasetLoadError(Exception): @@ -15,69 +15,35 @@ class DatasetLoadError(Exception): pass -class CommandError(Exception): +class APIError(ValueError): """ - Raised by failed request to a CryoSPARC command server. + Raised by failed request to a CryoSPARC API server. """ code: int - data: Any - - def __init__(self, reason: str, *args: object, url: str = "", code: int = 500, data: Any = None) -> None: - msg = f"*** ({url}, code {code}) {reason}" + data: Any = None + + def __init__( + self, + reason: str, + *args: object, + res: "Response", + data: Any = None, # must be JSON-encodable if provided + ) -> None: + msg = f"*** [API] ({res.request.method} {res.url}, code {res.status_code}) {reason}" super().__init__(msg, *args) - self.code = code - self.data = data - - -class SlotsValidation(TypedDict): - """ - Data from validation error when specifying external result input/output slots. - - :meta private: - """ - - type: Datatype - valid: List[SlotSpec] - invalid: List[Datafield] - valid_dtypes: List[str] - - -class InvalidSlotsError(ValueError): - """ - Raised by functions that accept slots arguments when CryoSPARC reports that - given slots are not valid. - """ - - def __init__(self, caller: str, validation: SlotsValidation): - type = validation["type"] - valid_slots = validation["valid"] - invalid_slots = validation["invalid"] - valid_dtypes = validation["valid_dtypes"] - msg = "\n".join( - [ - f"Unknown {type} slot dtype(s): {', '.join(s['dtype'] for s in invalid_slots)}. " - "Only the following slot dtypes are valid:", - "", - ] - + [f" - {t}" for t in valid_dtypes] - + [ - "", - "If adding a dynamic result such as alignments_class_#, specify a " - "slots=... argument with a full data field specification:", - "", - f" {caller}(... , slots=[", - " ...", - ] - + [f" {repr(s)}," for s in valid_slots] - + [ - " {'dtype': '', 'prefix': '%s', 'required': True}," % s["dtype"] - for s in invalid_slots - ] - + [" ...", " ])"] - ) - - return super().__init__(msg) + self.code = res.status_code + if data is not None: + self.data = data + elif res.headers.get("Content-Type") == "application/json": + self.data = res.json() + + def __str__(self): + s = super().__str__() + if self.data: + s += "\nResponse data:\n" + s += json.dumps(self.data, indent=4) + return s class ExternalJobError(Exception): diff --git a/cryosparc/json_util.py b/cryosparc/json_util.py new file mode 100644 index 00000000..4100e647 --- /dev/null +++ b/cryosparc/json_util.py @@ -0,0 +1,99 @@ +import base64 +from datetime import datetime +from enum import Enum +from pathlib import PurePath +from typing import Any, Mapping + +import numpy as n +from pydantic import BaseModel + + +def api_encode(obj: Any): + """ + Recursively encode any object for transmission through the API. + """ + if isinstance(obj, dict): + return {k: api_encode(v) for k, v in obj} + elif isinstance(obj, list): + return [api_encode(v) for v in obj] + else: + return api_default(obj) + + +def api_default(obj: Any) -> Any: + """ + json.dump "default" argument for sending objects over a JSON API. Ensures + that special non-JSON types such as Path are NDArray are encoded correctly. + """ + if isinstance(obj, n.floating): + if n.isnan(obj): + return float(0) + elif n.isposinf(obj): + return float("inf") + elif n.isneginf(obj): + return float("-inf") + return float(obj) + elif isinstance(obj, n.integer): + return int(obj) + elif isinstance(obj, n.ndarray): + return ndarray_to_json(obj) + elif isinstance(obj, bytes): + return binary_to_json(obj) + elif isinstance(obj, datetime): + return obj.isoformat() + elif isinstance(obj, Enum): + return obj.value + elif isinstance(obj, PurePath): + return str(obj) + elif isinstance(obj, BaseModel): + return obj.model_dump(exclude_none=True) + else: + return obj + + +def api_object_hook(dct: Mapping[str, Any]): + """ + json.dump "object_hook" argument for receiving JSON from an API request. + Ensures that special objects that are actually encoded numpy arrays or bytes + are decoded as such. + """ + if "$ndarray" in dct: + return ndarray_from_json(dct) + elif "$binary" in dct: + return binary_from_json(dct) + else: + return dct # pydantic will take care of everything else + + +def binary_to_json(binary: bytes): + """ + Encode bytes as a JSON-serializeable object + """ + return {"$binary": {"base64": base64.b64encode(binary).decode()}} + + +def binary_from_json(dct: Mapping[str, Any]) -> bytes: + if "base64" not in dct["$binary"] or not isinstance(b64 := dct["$binary"]["base64"], str): + raise TypeError(f"$binary base64 must be a string: {dct}") + return base64.b64decode(b64.encode()) + + +def ndarray_to_json(arr: n.ndarray): + """ + Encode a numpy array a JSON-serializeable object. + """ + return { + "$ndarray": { + "base64": base64.b64encode(arr.data).decode(), + "dtype": str(arr.dtype), + "shape": arr.shape, + } + } + + +def ndarray_from_json(dct: Mapping[str, Any]): + """ + Decode a serialized numpy array. + """ + data = base64.b64decode(dct["$ndarray"]["base64"]) + return n.frombuffer(data, dct["$ndarray"]["dtype"]).reshape(dct["$ndarray"]["shape"]) diff --git a/cryosparc/model_registry.py b/cryosparc/model_registry.py new file mode 100644 index 00000000..d9473859 --- /dev/null +++ b/cryosparc/model_registry.py @@ -0,0 +1,60 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +from .models import ( + api_request, + api_response, + asset, + auth, + diagnostics, + event, + exposure, + external, + gpu, + instance, + job, + job_register, + job_spec, + license, + notification, + project, + scheduler_lane, + scheduler_target, + service, + session, + session_config_profile, + session_params, + session_spec, + signature, + tag, + user, + workspace, +) +from .registry import register_model_module + +register_model_module(session) +register_model_module(job) +register_model_module(scheduler_target) +register_model_module(gpu) +register_model_module(api_request) +register_model_module(api_response) +register_model_module(job_spec) +register_model_module(exposure) +register_model_module(event) +register_model_module(user) +register_model_module(session_params) +register_model_module(external) +register_model_module(project) +register_model_module(asset) +register_model_module(signature) +register_model_module(instance) +register_model_module(job_register) +register_model_module(license) +register_model_module(service) +register_model_module(notification) +register_model_module(diagnostics) +register_model_module(scheduler_lane) +register_model_module(workspace) +register_model_module(session_spec) +register_model_module(session_config_profile) +register_model_module(tag) +register_model_module(auth) diff --git a/cryosparc/models/__init__.py b/cryosparc/models/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/cryosparc/models/api_request.py b/cryosparc/models/api_request.py new file mode 100644 index 00000000..1fc0cf01 --- /dev/null +++ b/cryosparc/models/api_request.py @@ -0,0 +1,13 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +from pydantic import BaseModel + + +class AppSession(BaseModel): + user_id: str + session_id: str + signature: str + + +class SHA256Password(BaseModel): + password: str diff --git a/cryosparc/models/api_response.py b/cryosparc/models/api_response.py new file mode 100644 index 00000000..4445a86f --- /dev/null +++ b/cryosparc/models/api_response.py @@ -0,0 +1,70 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +from typing import List, Optional + +from pydantic import BaseModel + + +class BrowseFile(BaseModel): + file_name: str + base_path: str + is_hidden: bool + path_abs: str + is_link: bool = False + mtime: Optional[float] = None + size: Optional[int] = None + type: Optional[str] = None + link_path: Optional[str] = None + errmesg: Optional[str] = None + + +class BrowseFileResponse(BaseModel): + back_path: str + files: List[BrowseFile] + type: str + + +class DeleteJobPreview(BaseModel): + project_uid: str + uid: str + workspace_uids: List[str] + status: str + title: str + type: str + + +class DeleteProjectWorkspacePreview(BaseModel): + project_uid: str + uid: str + title: Optional[str] + + +class DeleteProjectPreview(BaseModel): + jobs: List[DeleteJobPreview] + workspaces: List[DeleteProjectWorkspacePreview] + + +class DeleteWorkspacePreview(BaseModel): + jobs: List[DeleteJobPreview] + + +class GetFinalResultsResponse(BaseModel): + final_results: List[str] + ancestors_of_final_results: List[str] + non_ancestors_of_final_results: List[str] + + +class Hello(BaseModel): + name: str = "CryoSPARC" + version: str + service: str + + +class WorkspaceAncestorUidsResponse(BaseModel): + ancestors: List[str] + non_ancestors: List[str] + + +class WorkspaceDescendantUidsResponse(BaseModel): + descendants: List[str] + non_descendants: List[str] diff --git a/cryosparc/models/asset.py b/cryosparc/models/asset.py new file mode 100644 index 00000000..1a5b41ba --- /dev/null +++ b/cryosparc/models/asset.py @@ -0,0 +1,34 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +import datetime +from typing import Literal, Optional, Union + +from pydantic import BaseModel, Field + + +class GridFSAsset(BaseModel): + """ + Information about an uploaded GridFS file. + """ + + fileid: str + filename: str + filetype: str + + +class GridFSFile(BaseModel): + id: str = Field("000000000000000000000000", alias="_id") + updated_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + created_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + filename: str + contentType: Union[ + Literal["text/plain", "text/csv", "text/html", "application/json", "application/xml", "application/x-troff"], + Literal["application/pdf", "image/gif", "image/jpeg", "image/png", "image/svg+xml"], + str, + ] + uploadDate: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + length: int + chunkSize: int + md5: Optional[str] = None + project_uid: str + job_uid: str diff --git a/cryosparc/models/auth.py b/cryosparc/models/auth.py new file mode 100644 index 00000000..aa91e66b --- /dev/null +++ b/cryosparc/models/auth.py @@ -0,0 +1,8 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +from pydantic import BaseModel + + +class Token(BaseModel): + access_token: str + token_type: str diff --git a/cryosparc/models/diagnostics.py b/cryosparc/models/diagnostics.py new file mode 100644 index 00000000..28106242 --- /dev/null +++ b/cryosparc/models/diagnostics.py @@ -0,0 +1,32 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +import datetime +from typing import List, Optional, Union + +from pydantic import BaseModel + +from .instance import InstanceInformation +from .scheduler_target import Cluster, Node + + +class SchedulerTargetInformation(BaseModel): + cache_path: Optional[str] = None + cache_reserve_mb: Optional[int] = None + cache_quota_mb: Optional[int] = None + lane: str + name: str + title: str + desc: Optional[str] = None + hostname: str + worker_bin_path: str + config: Union[Node, Cluster] + instance_information: Optional[InstanceInformation] = None + + +class RuntimeDiagnostics(BaseModel): + cryosparc_version: str + cryosparc_patch: str + instance_information: InstanceInformation + scheduler_targets: List[SchedulerTargetInformation] + db_stats: dict + date_generated: datetime.datetime diff --git a/cryosparc/models/event.py b/cryosparc/models/event.py new file mode 100644 index 00000000..bc4b5db2 --- /dev/null +++ b/cryosparc/models/event.py @@ -0,0 +1,90 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +import datetime +from typing import List, Literal, Optional + +from pydantic import BaseModel, Field + +from .asset import GridFSAsset + + +class CheckpointEvent(BaseModel): + id: str = Field("000000000000000000000000", alias="_id") + updated_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + created_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + project_uid: str + job_uid: str + cpumem_mb: Optional[float] = None + avail_mb: Optional[float] = None + flags: List[str] = [] + meta: dict = {} + type: str + + +class Event(BaseModel): + id: str = Field("000000000000000000000000", alias="_id") + updated_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + created_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + project_uid: str + job_uid: str + cpumem_mb: Optional[float] = None + avail_mb: Optional[float] = None + flags: List[str] = [] + meta: dict = {} + + +class ImageEvent(BaseModel): + id: str = Field("000000000000000000000000", alias="_id") + updated_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + created_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + project_uid: str + job_uid: str + cpumem_mb: Optional[float] = None + avail_mb: Optional[float] = None + flags: List[str] = [] + meta: dict = {} + type: str + text: str + imgfiles: List[GridFSAsset] = [] + + +class InteractiveImgfile(BaseModel): + imgfiles: List[GridFSAsset] + components: List[int] = [] + + +class InteractiveEvent(BaseModel): + id: str = Field("000000000000000000000000", alias="_id") + updated_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + created_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + project_uid: str + job_uid: str + cpumem_mb: Optional[float] = None + avail_mb: Optional[float] = None + flags: List[str] = [] + meta: dict = {} + type: str + subtype: str = "3dscatter" + text: str + datafile: GridFSAsset + preview_imgfiles: List[InteractiveImgfile] = [] + components: List[int] = [] + + +class TextEvent(BaseModel): + """ + An event with only text and no additional image or interactive data. May + have "text", "warning" or "error" type. + """ + + id: str = Field("000000000000000000000000", alias="_id") + updated_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + created_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + project_uid: str + job_uid: str + cpumem_mb: Optional[float] = None + avail_mb: Optional[float] = None + flags: List[str] = [] + meta: dict = {} + type: Literal["text", "warning", "error"] + text: str diff --git a/cryosparc/models/exposure.py b/cryosparc/models/exposure.py new file mode 100644 index 00000000..47bbf161 --- /dev/null +++ b/cryosparc/models/exposure.py @@ -0,0 +1,242 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +import datetime +from typing import Any, List, Literal, Optional, Tuple, Union + +from pydantic import BaseModel, Field + +from .session_params import LivePreprocessingParams + + +class CTF(BaseModel): + accel_kv: float + amp_contrast: float + cross_corr_ctffind4: float + cs_mm: float + ctf_fit_to_A: float + df1_A: float + df2_A: float + df_angle_rad: float + exp_group_id: int + fig_of_merit_gctf: float + path: str + phase_shift_rad: float + type: str + + +class CtfStats(BaseModel): + cross_corr: int + ctf_fit_to_A: float + df_range: List[Any] + df_tilt_normal: List[Any] + diag_image_path: str + fit_data_path: str + ice_thickness_rel: float + spectrum_dim: int + type: str + + +class ECLExposureProperties(BaseModel): + do_athena_results_upload: bool = False + + +class StatBlob(BaseModel): + binfactor: int + idx: int + path: str + psize_A: float + shape: Tuple[int, int] + + +class GainRefBlob(BaseModel): + flip_x: int + flip_y: int + idx: int + path: str + rotate_num: int + shape: List[int] + + +class MicrographBlob(BaseModel): + format: str + idx: int + is_background_subtracted: bool + path: str + psize_A: float + shape: List[int] + + +class MovieBlob(BaseModel): + format: str + has_defect_file: bool = False + import_sig: int = 0 + is_gain_corrected: bool + path: str + psize_A: float + shape: List[int] + + +class MScopeParams(BaseModel): + accel_kv: float + beam_shift: List[int] = [0, 0] + beam_shift_known: int = 0 + cs_mm: float + defect_path: Optional[str] = None + exp_group_id: int + neg_stain: int = 0 + phase_plate: int + total_dose_e_per_A2: float = 0 + + +class MotionData(BaseModel): + frame_end: int + frame_start: int + idx: int + path: str + psize_A: float + type: str + zero_shift_frame: int + + +class ExposureElement(BaseModel): + background_blob: Optional[StatBlob] = None + ctf: Optional[CTF] = None + ctf_stats: Optional[CtfStats] = None + gain_ref_blob: Optional[GainRefBlob] = None + micrograph_blob: Optional[MicrographBlob] = None + micrograph_blob_non_dw: Optional[MicrographBlob] = None + micrograph_blob_thumb: Optional[MicrographBlob] = None + movie_blob: Optional[MovieBlob] = None + mscope_params: Optional[MScopeParams] = None + rigid_motion: Optional[MotionData] = None + spline_motion: Optional[MotionData] = None + uid: int = 0 + + +class PickerLocations(BaseModel): + center_x_frac: List[float] = [] + center_y_frac: List[float] = [] + + +class ParticleManual(BaseModel): + count: int = 0 + fields: List[str] = [] + path: str = "." + location: PickerLocations = PickerLocations() + + +class ParticleInfo(BaseModel): + count: int = 0 + fields: List[str] = [] + path: str = "." + + +class PickerInfoElement(BaseModel): + count: int = 0 + fields: List[str] = [] + path: str = "." + output_shape: Optional[int] = None + picker_type: Optional[Literal["blob", "template", "manual"]] = None + + +class ExposureGroups(BaseModel): + """ + Metadata about outputs produced by a specific exposure + """ + + exposure: ExposureElement = ExposureElement() + particle_manual: ParticleManual = ParticleManual() + particle_blob: ParticleInfo = ParticleInfo() + particle_template: ParticleInfo = ParticleInfo() + particle_deep: dict = {} + particle_extracted: Union[List[PickerInfoElement], ParticleInfo] = ParticleInfo() + particle_manual_extracted: PickerInfoElement = PickerInfoElement() + + +class ExposureAttributes(BaseModel): + """ + Exposure processing metadata. The "round" param is used for display in the + UI (defaults to 0 if not specified). + """ + + found_at: float = 0 + check_at: float = 0 + motion_at: float = 0 + thumbs_at: float = 0 + ctf_at: float = 0 + pick_at: float = 0 + extract_at: float = 0 + manual_extract_at: float = 0 + ready_at: float = 0 + total_motion_dist: float = 0 + max_intra_frame_motion: float = 0 + average_defocus: float = 0 + defocus_range: float = 0 + astigmatism_angle: float = 0 + astigmatism: float = 0 + phase_shift: float = 0 + ctf_fit_to_A: float = 0 + ice_thickness_rel: float = 0 + df_tilt_angle: float = 0 + total_manual_picks: int = 0 + total_blob_picks: int = 0 + blob_pick_score_median: float = 0 + total_template_picks: int = 0 + template_pick_score_median: float = 0 + total_extracted_particles: int = 0 + total_extracted_particles_manual: int = 0 + total_extracted_particles_blob: int = 0 + total_extracted_particles_template: int = 0 + + +class Exposure(BaseModel): + id: str = Field("000000000000000000000000", alias="_id") + updated_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + created_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + uid: int + project_uid: str + session_uid: str + exp_group_id: int + abs_file_path: str + size: int + discovered_at: datetime.datetime + picker_type: Optional[Literal["blob", "template", "manual"]] = None + deleted: bool = False + parameter_version: Optional[int] = None + stage: Literal[ + "go_to_found", + "found", + "check", + "motion", + "ctf", + "thumbs", + "pick", + "extract", + "extract_manual", + "ready", + "restoring", + "restoring_motion", + "restoring_thumbs", + "restoring_ctf", + "restoring_extract", + "restoring_extract_manual", + "compacted", + ] = "found" + fail_count: int = 0 + failed: bool = False + fail_reason: str = "" + in_progress: bool = False + manual_reject: bool = False + threshold_reject: bool = False + test: bool = False + worker_juid: Optional[str] = None + priority: int = 0 + groups: ExposureGroups = ExposureGroups() + attributes: ExposureAttributes = ExposureAttributes() + test_parameters: Optional[LivePreprocessingParams] = None + preview_img_1x: List[str] = [] + preview_img_2x: List[str] = [] + thumb_shape: List[int] = [] + micrograph_shape: List[int] = [] + micrograph_psize: Optional[float] = None + ecl: ECLExposureProperties = ECLExposureProperties() diff --git a/cryosparc/models/external.py b/cryosparc/models/external.py new file mode 100644 index 00000000..d81514f5 --- /dev/null +++ b/cryosparc/models/external.py @@ -0,0 +1,17 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +from typing import Optional + +from pydantic import BaseModel + +from .job_spec import OutputRef, OutputSpec + + +class ExternalOutputSpec(BaseModel): + """ + Specification for an external job with a single output. + """ + + name: str + spec: OutputSpec + connection: Optional[OutputRef] = None diff --git a/cryosparc/models/gpu.py b/cryosparc/models/gpu.py new file mode 100644 index 00000000..4c2aaa8e --- /dev/null +++ b/cryosparc/models/gpu.py @@ -0,0 +1,33 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +from typing import Literal + +from pydantic import BaseModel + + +class Gpu(BaseModel): + """ + GPU details for a target. + """ + + id: int + name: str + mem: int + + +class GpuInfo(BaseModel): + id: int + name: str + mem: int + bus_id: str = "" + compute_mode: Literal["Default", "Exclusive Thread", "Prohibited", "Exclusive Process"] = "Default" + persistence_mode: Literal["Disabled", "Enabled"] = "Disabled" + power_limit: float = 0.0 + sw_power_limit: Literal["Not Active", "Active"] = "Not Active" + hw_power_limit: Literal["Not Active", "Active"] = "Not Active" + max_pcie_link_gen: int = 0 + current_pcie_link_gen: int = 0 + temperature: int = 0 + gpu_utilization: int = 0 + memory_utilization: int = 0 + driver_version: str = "" diff --git a/cryosparc/models/instance.py b/cryosparc/models/instance.py new file mode 100644 index 00000000..0c3db5e3 --- /dev/null +++ b/cryosparc/models/instance.py @@ -0,0 +1,28 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +from typing import List, Optional + +from pydantic import BaseModel + +from .gpu import GpuInfo + + +class InstanceInformation(BaseModel): + platform_node: str = "" + platform_release: str = "" + platform_version: str = "" + platform_architecture: str = "" + cpu_model: str = "" + physical_cores: int = 0 + max_cpu_freq: float = 0.0 + total_memory: str = "0B" + available_memory: str = "0B" + used_memory: str = "0B" + ofd_soft_limit: int = 0 + ofd_hard_limit: int = 0 + driver_version: Optional[str] = None + toolkit_version: Optional[str] = None + CUDA_version: Optional[str] = None + nvrtc_version: Optional[str] = None + gpu_info: Optional[List[GpuInfo]] = None + version: str = "" diff --git a/cryosparc/models/job.py b/cryosparc/models/job.py new file mode 100644 index 00000000..86113cb8 --- /dev/null +++ b/cryosparc/models/job.py @@ -0,0 +1,146 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +import datetime +from typing import Dict, List, Literal, Optional, Tuple + +from pydantic import BaseModel, Field + +from .instance import InstanceInformation +from .job_spec import JobSpec +from .scheduler_target import FixedResourceSlots, ResourceSlots, SchedulerTarget + + +class AllocatedResources(BaseModel): + lane: Optional[str] = None + lane_type: Optional[str] = None + hostname: str + target: Optional[SchedulerTarget] = None + slots: ResourceSlots = ResourceSlots() + fixed: FixedResourceSlots = FixedResourceSlots() + licenses_acquired: int = 0 + + +JobStatus = Literal["building", "queued", "launched", "started", "running", "waiting", "completed", "killed", "failed"] + + +class JobLastAccessed(BaseModel): + name: str = "" + accessed_at: datetime.datetime = datetime.datetime(1, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + + +class RunError(BaseModel): + message: str + warning: bool = False + + +class UiTileImage(BaseModel): + name: str + fileid: str + num_rows: Optional[int] = None + num_cols: Optional[int] = None + + +class Job(BaseModel): + """ + Specification for a Job document from the MongoDB database. + """ + + id: str = Field("000000000000000000000000", alias="_id") + updated_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + created_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + dumped_at: Optional[datetime.datetime] = None + last_dumped_version: Optional[str] = None + autodump: bool = True + uid: str + project_uid: str + workspace_uids: List[str] + spec: JobSpec + job_dir: str + job_dir_size: int = 0 + job_dir_size_last_updated: Optional[datetime.datetime] = None + run_as_user: Optional[str] = None + title: str = "" + description: str = "" + status: JobStatus = "building" + created_by_user_id: Optional[str] = None + created_by_job_uid: Optional[str] = None + cloned_from: Optional[str] = None + queued_at: Optional[datetime.datetime] = None + started_at: Optional[datetime.datetime] = None + launched_at: Optional[datetime.datetime] = None + running_at: Optional[datetime.datetime] = None + waiting_at: Optional[datetime.datetime] = None + completed_at: Optional[datetime.datetime] = None + killed_at: Optional[datetime.datetime] = None + failed_at: Optional[datetime.datetime] = None + heartbeat_at: Optional[datetime.datetime] = None + tokens_acquired_at: Optional[datetime.datetime] = None + tokens_requested_at: Optional[datetime.datetime] = None + last_scheduled_at: Optional[datetime.datetime] = None + last_accessed: Optional[JobLastAccessed] = None + has_error: bool = False + has_warning: bool = False + version_created: Optional[str] = None + version: Optional[str] = None + priority: int = 0 + deleted: bool = False + parents: List[str] = [] + children: List[str] = [] + resources_allocated: Optional[AllocatedResources] = None + queued_by_user_id: Optional[str] = None + queued_to_lane: Optional[str] = None + queued_to_hostname: Optional[str] = None + queued_to_gpu: Optional[List[int]] = None + queue_index: Optional[int] = None + queue_status: Optional[ + Literal[ + "waiting_inputs", + "project_paused", + "actively_queued", + "launched", + "waiting_maintenance", + "waiting_licenses", + "waiting_resources", + ] + ] = None + queue_message: Optional[str] = None + queued_job_hash: Optional[int] = None + num_tokens: int = 0 + job_sig: Optional[str] = None + errors_run: List[RunError] = [] + interactive_port: Optional[int] = None + PID_monitor: Optional[int] = None + PID_main: Optional[int] = None + PID_workers: List[int] = [] + cluster_job_id: Optional[str] = None + cluster_job_status: Optional[str] = None + cluster_job_status_code: Optional[str] = None + cluster_job_monitor_event_id: Optional[str] = None + cluster_job_monitor_retries: int = 0 + cluster_job_monitor_last_run_at: Optional[datetime.datetime] = None + cluster_job_submission_script: Optional[str] = None + cluster_job_custom_vars: Dict[str, str] = {} + ui_tile_images: List[UiTileImage] = [] + is_experiment: bool = False + enable_bench: bool = False + bench: dict = {} + bench_timings: Dict[str, List[Tuple[datetime.datetime, Optional[datetime.datetime]]]] = {} + completed_count: int = 0 + instance_information: InstanceInformation = InstanceInformation() + generate_intermediate_results: bool = False + last_intermediate_data_cleared_at: Optional[datetime.datetime] = None + last_intermediate_data_cleared_amount: int = 0 + intermediate_results_size_bytes: int = 0 + intermediate_results_size_last_updated: Optional[datetime.datetime] = None + is_final_result: bool = False + is_ancestor_of_final_result: bool = False + no_check_inputs_ready: bool = False + ui_layouts: Optional[dict] = None + progress: List[dict] = [] + last_exported_at: Optional[datetime.datetime] = None + last_exported_location: Optional[str] = None + last_exported_version: Optional[str] = None + tags: List[str] = [] + imported_at: Optional[datetime.datetime] = None + deleted_at: Optional[datetime.datetime] = None + starred_by: List[str] = [] diff --git a/cryosparc/models/job_register.py b/cryosparc/models/job_register.py new file mode 100644 index 00000000..91a36931 --- /dev/null +++ b/cryosparc/models/job_register.py @@ -0,0 +1,33 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +import datetime +from typing import List, Optional + +from pydantic import BaseModel, Field + +from .job_spec import BuilderTag, Category, InputSpecs, JobRegisterError, OutputSpecs, Stability + + +class JobRegisterJobSpec(BaseModel): + type: str + title: str + shorttitle: str + description: str + stability: Stability + category: Category + tags: List[BuilderTag] = [] + hidden: bool = False + interactive: bool = False + lightweight: bool = False + inputs: InputSpecs + outputs: OutputSpecs + params: dict + + +class JobRegister(BaseModel): + id: str = Field("000000000000000000000000", alias="_id") + updated_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + created_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + specs: List[JobRegisterJobSpec] = [] + error: Optional[JobRegisterError] = None + username: Optional[str] = None diff --git a/cryosparc/models/job_spec.py b/cryosparc/models/job_spec.py new file mode 100644 index 00000000..d6ead994 --- /dev/null +++ b/cryosparc/models/job_spec.py @@ -0,0 +1,257 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Union + +from pydantic import BaseModel, ConfigDict, RootModel + +BuilderTag = Literal[ + "new", "interactive", "gpuEnabled", "multiGpu", "utility", "import", "live", "benchmark", "wrapper" +] +""" +Visual indicators for jobs in the builder. +""" + +Category = Literal[ + "import", + "motion_correction", + "ctf_estimation", + "exposure_curation", + "particle_picking", + "extraction", + "deep_picker", + "particle_curation", + "reconstruction", + "refinement", + "ctf_refinement", + "variability", + "flexibility", + "postprocessing", + "local_refinement", + "helix", + "utilities", + "simulations", + "live", + "instance_testing", + "workflows", +] +""" +Section under which to group a job in the builder. +""" + + +class InputResult(BaseModel): + """ + Concrete job input result connection to an output result. + """ + + name: Optional[str] = None + dtype: str + job_uid: str + output: str + result: str + version: Union[int, str] = "F" + + +class Connection(BaseModel): + """ + Job input connection details. + """ + + job_uid: str + output: str + type: Literal[ + "exposure", + "particle", + "template", + "volume", + "volume_multi", + "mask", + "live", + "ml_model", + "symmetry_candidate", + "flex_mesh", + "flex_model", + "hyperparameter", + "denoise_model", + "annotation_model", + ] + results: List[InputResult] = [] + + +class OutputSlot(BaseModel): + """ + Specification of an output slot in the job configuration. Part of a group + """ + + name: str + dtype: str + + +class OutputSpec(BaseModel): + """ + Used for outputs with some generated data based on data forwarded from + input inheritance + """ + + type: Literal[ + "exposure", + "particle", + "template", + "volume", + "volume_multi", + "mask", + "live", + "ml_model", + "symmetry_candidate", + "flex_mesh", + "flex_model", + "hyperparameter", + "denoise_model", + "annotation_model", + ] + title: str + description: str = "" + slots: List[Union[OutputSlot, str]] = [] + passthrough: Optional[str] = None + passthrough_exclude_slots: List[str] = [] + + +class OutputRef(BaseModel): + """ + Minimal name reference to a specific job output + """ + + job_uid: str + output: str + + +class InputSlot(BaseModel): + """ + Specification of an input slot in the job configuration. Part of a group. + """ + + name: str + dtype: str + required: bool = False + + +class InputSpec(BaseModel): + type: Literal[ + "exposure", + "particle", + "template", + "volume", + "volume_multi", + "mask", + "live", + "ml_model", + "symmetry_candidate", + "flex_mesh", + "flex_model", + "hyperparameter", + "denoise_model", + "annotation_model", + ] + title: str + description: str = "" + slots: List[Union[InputSlot, str]] = [] + count_min: int = 0 + count_max: Union[int, str] = "inf" + repeat_allowed: bool = False + + +class InputSpecs(RootModel): + root: Dict[str, InputSpec] = {} + + +class Inputs(RootModel): + root: Dict[str, List[Connection]] = {} + + +class Params(BaseModel): + """ + Job parameter specifications. See API function projects.get_job_register + for allowed parameters based on job spec type. + """ + + model_config = ConfigDict(extra="allow") + if TYPE_CHECKING: + + def __init__(self, **kwargs: Any) -> None: ... + def __getattr__(self, key: str) -> Any: ... + + +class OutputResult(BaseModel): + """ + Concrete job output. + """ + + name: str + dtype: str + versions: List[int] = [] + metafiles: List[str] = [] + num_items: List[int] = [] + passthrough: bool = False + + +class Output(BaseModel): + type: Literal[ + "exposure", + "particle", + "template", + "volume", + "volume_multi", + "mask", + "live", + "ml_model", + "symmetry_candidate", + "flex_mesh", + "flex_model", + "hyperparameter", + "denoise_model", + "annotation_model", + ] + results: List[OutputResult] = [] + num_items: int = 0 + image: Optional[str] = None + summary: dict = {} + latest_summary_stats: dict = {} + + +class Outputs(RootModel): + root: Dict[str, Output] = {} + + +class JobSpec(BaseModel): + type: str + params: Params + inputs: Inputs = Inputs() + outputs: Outputs = Outputs() + + +Stability = Literal["develop", "beta", "stable", "legacy", "obsolete"] +""" +Lifecycle/development stage for a job. Jobs will change stabilities as they +are continually developed or replaced with other jobs. +""" + + +class OutputSpecs(RootModel): + root: Dict[str, OutputSpec] = {} + + +class JobRegisterError(BaseModel): + """ + Error that occurs when loading a developer job register. + """ + + type: str + message: str + traceback: str + + +class ResourceSpec(BaseModel): + cpu: int = 1 + gpu: int = 0 + ram: int = 1 + ssd: bool = False diff --git a/cryosparc/models/license.py b/cryosparc/models/license.py new file mode 100644 index 00000000..c99e3ae5 --- /dev/null +++ b/cryosparc/models/license.py @@ -0,0 +1,32 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +from typing import TypedDict + + +class LicenseInstance(TypedDict, total=False): + """ + Information about license allowance and usage for a specific instance. + """ + + id: str + current_in_use: int + num_queued_jobs: int + max_licenses_available: int + default_max_licenses_available: int + queued_jobs: str + active_jobs: str + version: str + alias: str + group_id: str + reserved_licenses: int + min_reserved_licenses: int + license_developer: bool + license_live_enabled: bool + commercial_instance: bool + valid: bool + this_instance: bool + + +class UpdateTag(TypedDict): + show_message: bool + message: str diff --git a/cryosparc/models/notification.py b/cryosparc/models/notification.py new file mode 100644 index 00000000..b74d8ae7 --- /dev/null +++ b/cryosparc/models/notification.py @@ -0,0 +1,24 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +import datetime +from typing import Literal, Optional + +from pydantic import BaseModel, Field + +from .job import JobStatus + + +class Notification(BaseModel): + id: str = Field("000000000000000000000000", alias="_id") + updated_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + created_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + project_uid: Optional[str] = None + job_uid: Optional[str] = None + message: str + progress_pct: Optional[float] = None + active: bool = True + status: Optional[Literal["success", "primary", "warning", "danger"]] = "success" + icon: str = "flag" + hide: bool = False + job_status: Optional[JobStatus] = None + ttl_seconds: int = 7 diff --git a/cryosparc/models/project.py b/cryosparc/models/project.py new file mode 100644 index 00000000..3c956529 --- /dev/null +++ b/cryosparc/models/project.py @@ -0,0 +1,68 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +import datetime +from typing import Dict, List, Literal, Optional + +from pydantic import BaseModel, Field + + +class GenerateIntermediateResultsSettings(BaseModel): + class_2D: bool = False + class_3D: bool = False + var_3D_disp: bool = False + + +class ProjectLastAccessed(BaseModel): + name: str = "" + accessed_at: datetime.datetime = datetime.datetime(1, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + + +class ProjectStats(BaseModel): + workspace_count: int = 0 + session_count: int = 0 + job_count: int = 0 + job_types: Dict[str, int] = {} + job_sections: Dict[str, int] = {} + job_status: Dict[str, int] = {} + updated_at: Optional[datetime.datetime] = None + + +class Project(BaseModel): + id: str = Field("000000000000000000000000", alias="_id") + updated_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + created_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + dumped_at: Optional[datetime.datetime] = None + last_dumped_version: Optional[str] = None + autodump: bool = True + uid: str + project_dir: str + owner_user_id: str + title: str + description: str = "" + project_params_pdef: dict = {} + queue_paused: bool = False + deleted: bool = False + users_with_access: List[str] = [] + size: int = 0 + size_last_updated: datetime.datetime + last_accessed: Optional[ProjectLastAccessed] = None + archived: bool = False + detached: bool = False + hidden: bool = False + generate_intermediate_results_settings: GenerateIntermediateResultsSettings = GenerateIntermediateResultsSettings() + last_exp_group_id_used: Optional[int] = None + develop_run_as_user: Optional[str] = None + imported_at: Optional[datetime.datetime] = None + import_status: Optional[Literal["importing", "complete", "failed"]] = None + project_stats: ProjectStats = ProjectStats() + last_archived_version: Optional[str] = None + last_detached_version: Optional[str] = None + is_cleanup_in_progress: bool = False + tags: List[str] = [] + starred_by: List[str] = [] + + +class ProjectSymlink(BaseModel): + path: str + target: str + exists: bool diff --git a/cryosparc/models/scheduler_lane.py b/cryosparc/models/scheduler_lane.py new file mode 100644 index 00000000..d5caca90 --- /dev/null +++ b/cryosparc/models/scheduler_lane.py @@ -0,0 +1,12 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +from typing import Literal, Optional + +from pydantic import BaseModel + + +class SchedulerLane(BaseModel): + name: str + type: Literal["node", "cluster"] + title: str + desc: Optional[str] = None diff --git a/cryosparc/models/scheduler_target.py b/cryosparc/models/scheduler_target.py new file mode 100644 index 00000000..2cfc3bc5 --- /dev/null +++ b/cryosparc/models/scheduler_target.py @@ -0,0 +1,98 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +from typing import Dict, List, Optional, Union + +from pydantic import BaseModel + +from .gpu import Gpu + + +class ResourceSlots(BaseModel): + """ + Listings of available resources on a worker node that may be allocated for + scheduling. + """ + + CPU: List[int] = [] + GPU: List[int] = [] + RAM: List[int] = [] + + +class FixedResourceSlots(BaseModel): + """ + Available resource slots that only indicate presence, not the amount that + may be allocated. (i.e., "SSD is available or not available") + """ + + SSD: bool = False + + +class Node(BaseModel): + """ + Node-type scheduler target that may include GPUs + """ + + type: str + ssh_str: str + resource_slots: ResourceSlots = ResourceSlots() + resource_fixed: FixedResourceSlots = FixedResourceSlots() + monitor_port: Optional[int] = None + gpus: Optional[List[Gpu]] = None + + +class Cluster(BaseModel): + """ + Cluster-type scheduler targets details + """ + + send_cmd_tpl: str = "{{ command }}" + qsub_cmd_tpl: str = "qsub {{ script_path_abs }}" + qstat_cmd_tpl: str = "qstat -as {{ cluster_job_id }}" + qdel_cmd_tpl: str = "qdel {{ cluster_job_id }}" + qinfo_cmd_tpl: str = "qstat -q" + type: str + script_tpl: str = "" + custom_vars: Dict[str, str] = {} + + +class SchedulerTarget(BaseModel): + """ + Details and configuration for a node or cluster target. + """ + + cache_path: Optional[str] = None + cache_reserve_mb: Optional[int] = None + cache_quota_mb: Optional[int] = None + lane: str + name: str + title: str + desc: Optional[str] = None + hostname: str + worker_bin_path: str + config: Union[Node, Cluster] + + +class SchedulerTarget_Cluster_(BaseModel): + cache_path: Optional[str] = None + cache_reserve_mb: Optional[int] = None + cache_quota_mb: Optional[int] = None + lane: str + name: str + title: str + desc: Optional[str] = None + hostname: str + worker_bin_path: str + config: Cluster + + +class SchedulerTarget_Node_(BaseModel): + cache_path: Optional[str] = None + cache_reserve_mb: Optional[int] = None + cache_quota_mb: Optional[int] = None + lane: str + name: str + title: str + desc: Optional[str] = None + hostname: str + worker_bin_path: str + config: Node diff --git a/cryosparc/models/service.py b/cryosparc/models/service.py new file mode 100644 index 00000000..1b94e107 --- /dev/null +++ b/cryosparc/models/service.py @@ -0,0 +1,16 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +from typing import Literal + +LoggingService = Literal[ + "app", "database", "cache", "api", "scheduler", "command_vis", "app_api", "supervisord", "update" +] +""" +Same as Service, but also includes supervisord and update logs. +""" + +ServiceLogLevel = Literal["ERROR", "WARNING", "INFO", "DEBUG"] +""" +Possible values for event log messages used in CLI functions that write to +them. +""" diff --git a/cryosparc/models/session.py b/cryosparc/models/session.py new file mode 100644 index 00000000..c876c275 --- /dev/null +++ b/cryosparc/models/session.py @@ -0,0 +1,358 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +import datetime +from typing import Any, Dict, List, Literal, Optional + +from pydantic import BaseModel, Field + +from .job import JobStatus, RunError +from .session_params import LiveAbinitParams, LiveClass2DParams, LivePreprocessingParams, LiveRefineParams +from .session_spec import SessionStatus +from .signature import ImportSignature +from .workspace import WorkspaceStats + + +class AbInitioVolumeInfo(BaseModel): + vol_gname: str + fileid: Optional[str] = None + selected: bool = False + + +class AthenaVolumeUploadParams(BaseModel): + type: str = "refinement" + name: str + path_rel: str + symmetry: str = "C1" + psize_A: float = 1.0 + res_A: Optional[float] = None + + +class DataManagementStat(BaseModel): + status: Literal["active", "archiving", "archived", "deleted", "deleting", "missing", "calculating"] = "active" + prev_status: Optional[ + Literal["active", "archiving", "archived", "deleted", "deleting", "missing", "calculating"] + ] = None + size: int = 0 + + +class DataManagementStats(BaseModel): + raw: DataManagementStat = DataManagementStat() + micrographs: DataManagementStat = DataManagementStat() + thumbnails: DataManagementStat = DataManagementStat() + particles: DataManagementStat = DataManagementStat() + metadata: DataManagementStat = DataManagementStat() + + +class ECLSessionProperties(BaseModel): + do_athena_volume_upload: bool = False + athena_volume_upload_params: Optional[AthenaVolumeUploadParams] = None + + +class ExposureGroup(BaseModel): + """ + Full exposure group defintion, not all properties are externally editable + """ + + ignore_exposures: bool = False + gainref_path: Optional[str] = None + defect_path: Optional[str] = None + file_engine_recursive: bool = False + file_engine_watch_path_abs: str = "/" + file_engine_filter: str = "*" + file_engine_interval: int = 10 + file_engine_min_file_size: int = 0 + file_engine_min_modified_time_delta: int = 0 + exp_group_id: int = 1 + num_exposures_found: int = 0 + num_exposures_ready: int = 0 + file_engine_strategy: Literal["entity", "timestamp", "eclathena"] = "entity" + file_engine_enable: bool = False + final: bool = False + is_any_eer: bool = False + + +class ExposureGroupUpdate(BaseModel): + """ + Public editable properties for exposure group + """ + + ignore_exposures: bool = False + gainref_path: Optional[str] = None + defect_path: Optional[str] = None + file_engine_recursive: bool = False + file_engine_watch_path_abs: str = "/" + file_engine_filter: str = "*" + file_engine_interval: int = 10 + file_engine_min_file_size: int = 0 + file_engine_min_modified_time_delta: int = 0 + + +class LiveComputeResources(BaseModel): + phase_one_lane: Optional[str] = None + phase_one_gpus: int = 1 + phase_two_lane: Optional[str] = None + phase_two_gpus: int = 1 + phase_two_ssd: bool = True + auxiliary_lane: Optional[str] = None + auxiliary_gpus: int = 1 + auxiliary_ssd: bool = True + priority: int = 0 + + +class Phase2ParticleOutputInfo(BaseModel): + path: Optional[str] = None + count: int = 0 + fields: List[str] = [] + + +class Threshold(BaseModel): + min: Optional[float] = None + max: Optional[float] = None + value: Optional[float] = None + + +class RangeThreshold(BaseModel): + min: Optional[float] = None + max: Optional[float] = None + value: Optional[float] = None + min_value: Optional[float] = None + max_value: Optional[float] = None + + +class PickingThresholds(BaseModel): + manual_ncc_score: Threshold = Threshold() + manual_power: RangeThreshold = RangeThreshold() + blob_ncc_score: Threshold = Threshold() + blob_power: RangeThreshold = RangeThreshold() + template_ncc_score: Threshold = Threshold() + template_power: RangeThreshold = RangeThreshold() + deep_ncc_score: Threshold = Threshold() + deep_power: RangeThreshold = RangeThreshold() + + +class RTPChild(BaseModel): + uid: str + status: JobStatus + rtp_handle_func: Literal[ + "handle_template_creation_class2D", "phase2_class2D_handle", "phase2_abinit_handle", "phase2_refine_handle" + ] + + +class RtpWorkerState(BaseModel): + status: JobStatus + errors: List[RunError] = [] + + +class SessionLastAccessed(BaseModel): + name: str = "" + accessed_at: datetime.datetime = datetime.datetime(1, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + + +class SessionAttribute(BaseModel): + name: str + title: str + min: Optional[float] = None + max: Optional[float] = None + round: int = 0 + + +class TemplateClassInfo(BaseModel): + class_idx: int + fileid: str + res_A: float + selected: bool = False + num_particles_selected: int = 0 + num_particles_total: int = 0 + mean_prob: float = 0.0 + class_ess: float = 0.0 + + +class SessionStats(BaseModel): + total_exposures: int = 0 + total_queued: int = 0 + total_seen: int = 0 + total_in_progress: int = 0 + total_thumbs: int = 0 + total_test: int = 0 + total_accepted: int = 0 + total_rejected: int = 0 + total_failed: int = 0 + total_ready: int = 0 + average_manual_picks: int = 0 + total_blob_picks: int = 0 + total_template_picks: int = 0 + total_deep_picks: int = 0 + total_manual_picks: int = 0 + total_extracted_particles_blob: int = 0 + total_extracted_particles_template: int = 0 + total_extracted_particles_manual: int = 0 + total_extracted_particles_deep: int = 0 + total_extracted_particles: int = 0 + total_manual_picked_exposures: int = 0 + gsfsc: float = 0.0 + frames: int = 0 + nx: int = 0 + ny: int = 0 + manual_rejected: int = 0 + avg_movies_found_per_hour: int = 0 + avg_movies_ready_per_hour: int = 0 + avg_movies_accepted_per_hour: int = 0 + avg_particles_extracted_per_mic: int = 0 + avg_particles_extracted_per_hour: int = 0 + + +class Session(BaseModel): + id: str = Field("000000000000000000000000", alias="_id") + updated_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + created_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + dumped_at: Optional[datetime.datetime] = None + last_dumped_version: Optional[str] = None + autodump: bool = True + uid: str + project_uid: str + created_by_user_id: Optional[str] = None + title: Optional[str] = None + description: Optional[str] = None + created_by_job_uid: Optional[str] = None + tags: List[str] = [] + starred_by: List[str] = [] + deleted: bool = False + last_accessed: Optional[SessionLastAccessed] = None + workspace_stats: WorkspaceStats = WorkspaceStats() + notes: str = "" + notes_lock: Optional[str] = None + imported_at: Optional[datetime.datetime] = None + workspace_type: str = "live" + session_uid: str + session_dir: str + status: SessionStatus = "paused" + failed_at: List[datetime.datetime] = [] + running_at: List[datetime.datetime] = [] + paused_at: List[datetime.datetime] = [] + completed_at: Optional[datetime.datetime] = None + cleared_at: Optional[datetime.datetime] = None + elapsed_time: float = 0.0 + parameter_version: int = 0 + params: LivePreprocessingParams = LivePreprocessingParams() + attributes: List[SessionAttribute] = [ + SessionAttribute(name="found_at", title="Timestamp", min=None, max=None, round=0), + SessionAttribute(name="check_at", title="Check Stage Completed At", min=None, max=None, round=0), + SessionAttribute(name="motion_at", title="Motion Stage Completed At", min=None, max=None, round=0), + SessionAttribute(name="thumbs_at", title="Thumbs Stage Completed At", min=None, max=None, round=0), + SessionAttribute(name="ctf_at", title="CTF Stage Completed At", min=None, max=None, round=0), + SessionAttribute(name="pick_at", title="Pick Stage Completed At", min=None, max=None, round=0), + SessionAttribute(name="extract_at", title="Extract Stage Completed At", min=None, max=None, round=0), + SessionAttribute( + name="manual_extract_at", title="Manual Extract Stage Completed At", min=None, max=None, round=0 + ), + SessionAttribute(name="ready_at", title="Exposure Ready At", min=None, max=None, round=0), + SessionAttribute(name="total_motion_dist", title="Total Motion (pix)", min=None, max=None, round=2), + SessionAttribute(name="max_intra_frame_motion", title="Max In-Frame Motion", min=None, max=None, round=3), + SessionAttribute(name="average_defocus", title="Defocus Avg. (Å)", min=None, max=None, round=0), + SessionAttribute(name="defocus_range", title="Defocus Range (Å)", min=None, max=None, round=0), + SessionAttribute(name="astigmatism_angle", title="Astigmatism Angle (deg)", min=None, max=None, round=1), + SessionAttribute(name="astigmatism", title="Astigmatism", min=None, max=None, round=2), + SessionAttribute(name="phase_shift", title="Phase Shift (deg)", min=None, max=None, round=1), + SessionAttribute(name="ctf_fit_to_A", title="CTF Fit (Å)", min=None, max=None, round=3), + SessionAttribute(name="ice_thickness_rel", title="Relative Ice Thickness", min=None, max=None, round=3), + SessionAttribute(name="df_tilt_angle", title="Sample Tilt (deg)", min=None, max=None, round=1), + SessionAttribute(name="total_manual_picks", title="Total Manual Picks", min=None, max=None, round=0), + SessionAttribute(name="total_blob_picks", title="Total Blob Picks", min=None, max=None, round=0), + SessionAttribute(name="blob_pick_score_median", title="Median Blob Pick Score", min=None, max=None, round=3), + SessionAttribute(name="total_template_picks", title="Total Template Picks", min=None, max=None, round=0), + SessionAttribute( + name="template_pick_score_median", title="Median Template Pick Score", min=None, max=None, round=3 + ), + SessionAttribute( + name="total_extracted_particles", + title="Total Manual Picker Particles Extracted", + min=None, + max=None, + round=0, + ), + SessionAttribute( + name="total_extracted_particles_manual", + title="Total Blob Picker Particles Extracted", + min=None, + max=None, + round=0, + ), + SessionAttribute( + name="total_extracted_particles_blob", + title="Total Template Picker Particles Extracted", + min=None, + max=None, + round=0, + ), + SessionAttribute( + name="total_extracted_particles_template", title="Total Particles Extracted", min=None, max=None, round=0 + ), + ] + picking_thresholds: PickingThresholds = PickingThresholds() + compute_resources: LiveComputeResources = LiveComputeResources() + phase_one_workers: Dict[str, RtpWorkerState] = {} + phase_one_workers_soft_kill: List[Any] = [] + live_session_job_uid: Optional[str] = None + file_engine_status: Literal["inactive", "running"] = "inactive" + file_engine_last_run: Optional[datetime.datetime] = None + max_timestamps: List[Any] = [] + known_files: List[Any] = [] + rtp_childs: List[RTPChild] = [] + avg_usage: List[Any] = [] + template_creation_job: Optional[str] = None + template_creation_project: Optional[str] = None + template_creation_num_particles_in: int = 0 + template_creation_ready: bool = False + template_creation_info: List[TemplateClassInfo] = [] + exposure_groups: List[ExposureGroup] = [] + stats: SessionStats = SessionStats() + data_management: DataManagementStats = DataManagementStats() + import_signatures: ImportSignature = ImportSignature() + exposure_summary: dict = {} + particle_summary: dict = {} + exposure_processing_priority: Literal["normal", "oldest", "latest", "alternate"] = "normal" + cleared_extractions_at: Optional[datetime.datetime] = None + cleared_extractions_size: float = 0.0 + last_compacted_amount: int = 0 + last_compacted_at: Optional[datetime.datetime] = None + last_compacted_version: Optional[str] = None + last_restored_amount: int = 0 + last_restored_at: Optional[datetime.datetime] = None + compacted_exposures_count: int = 0 + restoration_notification_id: Optional[str] = None + restoration_user_id: Optional[str] = None + pre_restoration_size: int = 0 + phase2_class2D_restart: bool = False + phase2_class2D_params_spec: Optional[LiveClass2DParams] = None + phase2_class2D_params_spec_used: Optional[LiveClass2DParams] = None + phase2_class2D_job: Optional[str] = None + phase2_class2D_ready: bool = False + phase2_class2D_ready_partial: bool = False + phase2_class2D_info: List[TemplateClassInfo] = [] + phase2_class2D_num_particles_in: int = 0 + phase2_class2D_particles_out: Optional[Phase2ParticleOutputInfo] = None + phase2_class2D_num_particles_seen: int = 0 + phase2_class2D_num_particles_accepted: int = 0 + phase2_class2D_num_particles_rejected: int = 0 + phase2_class2D_last_updated: Optional[datetime.datetime] = None + phase2_select2D_job: Optional[str] = None + phase2_abinit_restart: bool = False + phase2_abinit_params_spec: LiveAbinitParams = LiveAbinitParams() + phase2_abinit_job: Optional[str] = None + phase2_abinit_ready: bool = False + phase2_abinit_info: List[AbInitioVolumeInfo] = [] + phase2_abinit_num_particles_in: int = 0 + phase2_refine_restart: bool = False + phase2_refine_params_spec: LiveRefineParams = LiveRefineParams() + phase2_refine_params_spec_used: Optional[LiveRefineParams] = None + phase2_refine_job: Optional[str] = None + phase2_refine_ready: bool = False + phase2_refine_ready_partial: bool = False + phase2_refine_num_particles_in: int = 0 + phase2_refine_last_updated: Optional[datetime.datetime] = None + athena_epu_run_id: Optional[str] = None + is_multigrid_epu_run: bool = False + is_gracefully_pausing: bool = False + computed_stats_last_run_time: Optional[datetime.datetime] = None + last_processed_exposure_priority: Literal["normal", "oldest", "latest", "alternate"] = "oldest" + ecl: ECLSessionProperties = ECLSessionProperties() diff --git a/cryosparc/models/session_config_profile.py b/cryosparc/models/session_config_profile.py new file mode 100644 index 00000000..ab55c115 --- /dev/null +++ b/cryosparc/models/session_config_profile.py @@ -0,0 +1,28 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +import datetime +from typing import List, Optional + +from pydantic import BaseModel, Field + +from .session import ExposureGroup, LiveComputeResources +from .session_params import LivePreprocessingParams + + +class SessionConfigProfile(BaseModel): + id: str = Field("000000000000000000000000", alias="_id") + updated_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + created_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + title: str + created_by_user_id: str + last_applied_at: Optional[datetime.datetime] = None + compute_resources: Optional[LiveComputeResources] = None + exp_groups: List[ExposureGroup] = [] + session_params: dict = {} + + +class SessionConfigProfileBody(BaseModel): + title: str + compute_resources: Optional[LiveComputeResources] = None + exp_groups: List[ExposureGroup] = [] + session_params: Optional[LivePreprocessingParams] = None diff --git a/cryosparc/models/session_params.py b/cryosparc/models/session_params.py new file mode 100644 index 00000000..5ec2f2b1 --- /dev/null +++ b/cryosparc/models/session_params.py @@ -0,0 +1,95 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +from typing import Literal, Optional + +from pydantic import BaseModel + + +class LivePreprocessingParams(BaseModel): + gainref_flip_x: bool = False + gainref_flip_y: bool = False + gainref_rotate_num: int = 0 + psize_A: float = 0.0 + accel_kv: float = 0.0 + cs_mm: float = 0.0 + total_dose_e_per_A2: float = 0.0 + phase_plate: bool = False + neg_stain: bool = False + eer_upsampfactor: int = 2 + eer_numfractions: int = 40 + motion_res_max_align: float = 5 + bfactor: float = 500 + frame_start: int = 0 + frame_end: Optional[int] = None + output_fcrop_factor: float = 1 + override_total_exp: Optional[float] = None + variable_dose: bool = False + smooth_lambda_cal: float = 0.5 + motion_override_K_Z: Optional[int] = None + motion_override_K_Y: Optional[int] = None + motion_override_K_X: Optional[int] = None + optimize_for_gpu_memory: bool = False + output_f16: bool = False + amp_contrast: float = 0.1 + ctf_res_min_align: float = 25 + ctf_res_max_align: float = 4 + df_search_min: float = 1000 + df_search_max: float = 40000 + do_phase_shift_search_refine: bool = False + phase_shift_min: float = 0 + phase_shift_max: float = 3.141592653589793 + do_phase_shift_refine_only: bool = False + ctf_override_K_Y: Optional[int] = None + ctf_override_K_X: Optional[int] = None + classic_mode: bool = False + current_picker: Literal["blob", "template", "deep"] = "blob" + blob_diameter_min: float = 0.0 + blob_diameter_max: float = 0.0 + use_circle: bool = True + use_ellipse: bool = False + use_ring: bool = False + blob_lowpass_res_template: float = 20 + blob_lowpass_res: float = 20 + blob_angular_spacing_deg: float = 5 + blob_use_ctf: bool = False + blob_min_distance: float = 1.0 + blob_num_process: Optional[int] = None + blob_num_plot: int = 10 + blob_max_num_hits: int = 4000 + template_diameter: Optional[float] = None + template_lowpass_res_template: float = 20 + template_lowpass_res: float = 20 + template_angular_spacing_deg: float = 5 + template_use_ctf: bool = True + template_min_distance: float = 0.5 + template_num_process: Optional[int] = None + template_num_plot: int = 10 + template_max_num_hits: int = 4000 + templates_from_job: Optional[str] = None + templates_selected: Optional[str] = None + thresh_score_min: Optional[float] = None + thresh_power_min: Optional[float] = None + thresh_power_max: Optional[float] = None + box_size_pix: int = 0 + bin_size_pix: Optional[int] = None + extract_f16: bool = False + do_plotting: bool = False + + +class LiveAbinitParams(BaseModel): + abinit_symmetry: str = "C1" + abinit_K: int = 1 + abinit_num_particles: Optional[int] = None + + +class LiveClass2DParams(BaseModel): + class2D_K: int = 50 + class2D_max_res: int = 6 + class2D_window_inner_A: Optional[float] = None + compute_use_ssd: bool = True + psize_mic: Optional[float] = None + + +class LiveRefineParams(BaseModel): + refine_symmetry: str = "C1" + psize_mic: Optional[float] = None diff --git a/cryosparc/models/session_spec.py b/cryosparc/models/session_spec.py new file mode 100644 index 00000000..2dd409d4 --- /dev/null +++ b/cryosparc/models/session_spec.py @@ -0,0 +1,5 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +from typing import Literal + +SessionStatus = Literal["paused", "running", "completed", "compacting", "compacted", "restoring", "restoration_failed"] diff --git a/cryosparc/models/signature.py b/cryosparc/models/signature.py new file mode 100644 index 00000000..331abbb1 --- /dev/null +++ b/cryosparc/models/signature.py @@ -0,0 +1,13 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +from pydantic import BaseModel + + +class ImportSignature(BaseModel): + """ + Binary signatures of imported paths used in import jobs and sessions. + Meant to analyze unique imports across projects, but currently unused. + """ + + count: int = 0 + signatures: str = "" diff --git a/cryosparc/models/tag.py b/cryosparc/models/tag.py new file mode 100644 index 00000000..81a74d1b --- /dev/null +++ b/cryosparc/models/tag.py @@ -0,0 +1,44 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +import datetime +from typing import Literal, Optional + +from pydantic import BaseModel, Field + + +class TagCounts(BaseModel): + total: int = 0 + project: int = 0 + workspace: int = 0 + session: int = 0 + job: int = 0 + + +class Tag(BaseModel): + id: str = Field("000000000000000000000000", alias="_id") + updated_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + created_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + uid: str + title: str + type: Literal["general", "project", "workspace", "session", "job"] + created_by_user_id: str + colour: Optional[ + Literal[ + "black", + "gray", + "red", + "orange", + "yellow", + "green", + "teal", + "cyan", + "sky", + "blue", + "indigo", + "purple", + "pink", + ] + ] = "gray" + description: Optional[str] = None + created_by_workflow: Optional[str] = None + counts: TagCounts = TagCounts() diff --git a/cryosparc/models/user.py b/cryosparc/models/user.py new file mode 100644 index 00000000..7014d53b --- /dev/null +++ b/cryosparc/models/user.py @@ -0,0 +1,81 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +import datetime +from typing import Dict, List, Literal, Optional, Union + +from pydantic import BaseModel, Field + + +class Email(BaseModel): + address: str + verified: bool = False + + +class LoginToken(BaseModel): + hashedToken: str + when: datetime.datetime + + +class LoginService(BaseModel): + loginTokens: List[LoginToken] = [] + + +class PasswordService(BaseModel): + bcrypt: str + + +class Profile(BaseModel): + name: Union[str, dict] = "" + + +class RecentJob(BaseModel): + project_uid: str + workspace_uid: str + job_uid: str + + +class RecentSession(BaseModel): + project_uid: str + session_uid: str + + +class RecentWorkspace(BaseModel): + project_uid: str + workspace_uid: str + + +class Services(BaseModel): + password: Optional[PasswordService] = None + resume: LoginService = LoginService() + + +class UserState(BaseModel): + licenseAccepted: bool = False + userCanSetJobPriority: bool = False + userCanModifyLiveDataManagement: bool = False + recentProjects: List[str] = [] + recentWorkspaces: List[RecentWorkspace] = [] + recentSessions: List[RecentSession] = [] + recentJobs: List[RecentJob] = [] + browserPath: Optional[str] = None + defaultJobPriority: int = 0 + + +class User(BaseModel): + id: str = Field("000000000000000000000000", alias="_id") + updated_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + created_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + emails: List[Email] + created_by_user_id: Optional[str] = None + name: str + first_name: str + last_name: str + status: str = "invited" + profile: Profile = Profile() + roles: Dict[str, List[Literal["user", "admin"]]] = {} + register_token: Optional[str] = None + reset_token: Optional[str] = None + services: Services = Services() + state: UserState = UserState() + allowed_prefix_dir: str = "/" + lanes: List[str] = [] diff --git a/cryosparc/models/workspace.py b/cryosparc/models/workspace.py new file mode 100644 index 00000000..9e8b8625 --- /dev/null +++ b/cryosparc/models/workspace.py @@ -0,0 +1,49 @@ +# THIS FILE IS AUTO-GENERATED, DO NOT EDIT DIRECTLY +# SEE dev/api_generate_models.py +import datetime +from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional + +from pydantic import BaseModel, ConfigDict, Field + + +class WorkspaceStats(BaseModel): + updated_at: Optional[datetime.datetime] = None + job_count: int = 0 + job_sections: Dict[str, int] = {} + job_status: Dict[str, int] = {} + job_types: Dict[str, int] = {} + + +class WorkspaceLastAccessed(BaseModel): + name: str + accessed_at: datetime.datetime + + +class Workspace(BaseModel): + id: str = Field("000000000000000000000000", alias="_id") + updated_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + created_at: datetime.datetime = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc) + dumped_at: Optional[datetime.datetime] = None + last_dumped_version: Optional[str] = None + autodump: bool = True + uid: str + project_uid: str + created_by_user_id: Optional[str] = None + title: Optional[str] = None + description: Optional[str] = None + created_by_job_uid: Optional[str] = None + tags: List[str] = [] + starred_by: List[str] = [] + deleted: bool = False + last_accessed: Optional[WorkspaceLastAccessed] = None + workspace_stats: WorkspaceStats = WorkspaceStats() + notes: str = "" + notes_lock: Optional[str] = None + imported_at: Optional[datetime.datetime] = None + workspace_type: Literal["base", "live"] = "base" + + model_config = ConfigDict(extra="allow") + if TYPE_CHECKING: + + def __init__(self, **kwargs: Any) -> None: ... + def __getattr__(self, key: str) -> Any: ... diff --git a/cryosparc/registry.py b/cryosparc/registry.py new file mode 100644 index 00000000..b8d35303 --- /dev/null +++ b/cryosparc/registry.py @@ -0,0 +1,127 @@ +""" +Model registration functions used by API client to determine how to interpret +JSON responses. Used for either cryosparc-tools or cryosparc models. +""" + +import re +import warnings +from enum import Enum +from inspect import isclass +from types import ModuleType +from typing import Dict, Iterable, Optional, Type + +from pydantic import BaseModel + +from .stream import Streamable + +FINALIZED: bool = False +REGISTERED_TYPED_DICTS: Dict[str, Type[dict]] = {} +REGISTERED_ENUMS: Dict[str, Type[Enum]] = {} +REGISTERED_MODEL_CLASSES: Dict[str, Type[BaseModel]] = {} +REGISTERED_STREAM_CLASSES: Dict[str, Type[Streamable]] = {} + + +def finalize(): + """ + Prevent registering additional types. Cannot be called twice. + """ + global FINALIZED + check_finalized(False) + FINALIZED = True + + +def check_finalized(finalized: bool = True): + """ + Ensure the register has or hasn't been finalized. This is used in + special contexts such as cryosparcm icli or Jupyter Notebooks where + cryosparc-tools may be used alongside an API client. + """ + assert FINALIZED is finalized, ( + f"Cannot proceed because registry is {'finalized' if FINALIZED else 'not finalized'}. " + "This likely means that you're using both cryosparc-tools AND the " + "CryoSPARC API client from client/api_client.py. Please use either " + "`CryoSPARC` from tools or `APIClient` from cryosparc, but not both." + ) + + +def register_model(name, model_class: Type[BaseModel]): + check_finalized(False) + REGISTERED_MODEL_CLASSES[name] = model_class + + +def register_typed_dict(name, typed_dict_class: Type[dict]): + check_finalized(False) + REGISTERED_TYPED_DICTS[name] = typed_dict_class + + +def register_enum(name, enum_class: Type[Enum]): + check_finalized(False) + REGISTERED_ENUMS[name] = enum_class + + +def register_model_module(mod: ModuleType): + for key, val in mod.__dict__.items(): + if not re.match(r"^[A-Z]", key) or not isclass(val): + continue + if issubclass(val, BaseModel): + register_model(key, val) + if issubclass(val, dict): + register_typed_dict(key, val) + if issubclass(val, Enum): + register_enum(key, val) + + +def model_for_ref(schema_ref: str) -> Optional[Type]: + """ + Given a string with format either `#/components/schemas/X` or + `#/components/schemas/X_Y_`, looks up key X in `REGISTERED_MODEL_CLASSES``, + and return either X or X[Y] depending on whether the string includes the + final Y component. + + Returns None if ref is not found. + """ + components = schema_ref.split("/") + if len(components) != 4 or components[0] != "#" or components[1] != "components" or components[2] != "schemas": + warnings.warn(f"Warning: Invalid schema reference {schema_ref}", stacklevel=2) + return + + schema_name = components[3] + if "_" in schema_name: # type var + generic, var, *_ = schema_name.split("_") + if generic in REGISTERED_MODEL_CLASSES and var in REGISTERED_MODEL_CLASSES: + return REGISTERED_MODEL_CLASSES[generic][REGISTERED_MODEL_CLASSES[var]] # type: ignore + elif schema_name in REGISTERED_MODEL_CLASSES: + return REGISTERED_MODEL_CLASSES[schema_name] + elif schema_name in REGISTERED_TYPED_DICTS: + return REGISTERED_TYPED_DICTS[schema_name] + elif schema_name in REGISTERED_ENUMS: + return REGISTERED_ENUMS[schema_name] + + warnings.warn(f"Warning: Unknown schema reference model {schema_ref}", stacklevel=2) + + +def is_streamable_mime_type(mime: str): + return mime in REGISTERED_STREAM_CLASSES + + +def register_stream_class(stream_class: Type[Streamable]): + mime = stream_class.media_type + assert mime not in REGISTERED_STREAM_CLASSES, ( + f"Cannot register {stream_class}; " + f"stream class with mime-type {mime} is already registered " + f"({REGISTERED_STREAM_CLASSES[mime]})" + ) + REGISTERED_STREAM_CLASSES[mime] = stream_class + + +def get_stream_class(mime: str): + return REGISTERED_STREAM_CLASSES.get(mime) # fails if mime-type not defined + + +def streamable_mime_types(): + return set(REGISTERED_STREAM_CLASSES.keys()) + + +def first_streamable_mime(strs: Iterable[str]) -> Optional[str]: + mimes = streamable_mime_types() & set(strs) + return mimes.pop() if len(mimes) > 0 else None diff --git a/cryosparc/spec.py b/cryosparc/spec.py index 0e05777c..32bb2acb 100644 --- a/cryosparc/spec.py +++ b/cryosparc/spec.py @@ -1,35 +1,34 @@ """ -Type specifications for CryoSPARC database entities. - -Unless otherwise noted, classes defined here represent dictionary instances -whose attributes may be accessed with dictionary key syntax. +Specification for various utility types used throughout tools and CryoSPARC. +""" -Examples: +from typing import Dict, List, Literal, Tuple, TypedDict, Union - Accessing job document details +Shape = Tuple[int, ...] +"""A numpy shape tuple from ndarray.shape""" - >>> cs = CryoSPARC() - >>> job = cs.find_job("P3", "J118") - >>> job.doc["output_results"][0]["metafiles"] - [ - "J118/J118_000_particles.cs", - "J118/J118_001_particles.cs", - "J118/J118_002_particles.cs", - "J118/J118_003_particles.cs" - ] +DType = Union[str, Tuple[str, Shape]] """ -from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Any, Dict, Generic, List, Optional, Tuple, TypeVar, Union + Can just be a single string such as "f4", "3u4" or "O". + A datatype description of a ndarray entry. -from typing_extensions import Literal, TypedDict + Can also be the a tuple with a string datatype name and its shape. For + example, the following dtypes are equivalent. -if TYPE_CHECKING: - from typing_extensions import Self # not present in typing-extensions=3.7 + - "3u4" + - ">> print(details['filename']) - image.png - """ - - _id: str - """Document ID""" - - filename: str - """File name""" - - contentType: AssetContentType - """Asset content type, e.g., "image/png" """ +MICROGRAPH_CONTENT_TYPES: Dict[MicrographFormat, MicrographContentType] = { + "mrc": "image/x-mrc", + "mrcs": "image/x-mrc", + "stk": "image/x-mrc", + "tif": "image/tiff", + "tiff": "image/tiff", + "eer": "application/x-eer", + "ecc": "application/x-eer", + "bz2": "application/x-bzip2", + "cmrcbz2": "application/x-bzip2", +} - uploadDate: str # ISO formatted - """ISO 8601-formatted asset upload date""" +DATASET_CONTENT_TYPES: Dict[DatasetFormat, DatasetContentType] = { + "cs": "application/x-cryosparc-dataset", # but usually has numpy format internally + "npy": "application/x-numpy", +} - length: int # in bytes - """Size of file in bytes""" +ASSET_CONTENT_TYPES: Dict[AssetFormat, AssetContentType] = {**TEXT_CONTENT_TYPES, **IMAGE_CONTENT_TYPES} +ASSET_EXTENSIONS: Dict[AssetContentType, AssetFormat] = {v: k for k, v in ASSET_CONTENT_TYPES.items()} - chunkSize: int # in bytes - """File chunk size in bytes""" +KNOWN_CONTENT_TYPES: Dict[AnyFormat, AnyContentType] = { + **TEXT_CONTENT_TYPES, + **IMAGE_CONTENT_TYPES, + **MICROGRAPH_CONTENT_TYPES, + **DATASET_CONTENT_TYPES, +} - md5: str - """MD5 hash of asset""" - project_uid: str - """Associated project UID""" +# Slot is defined in two classes like this because it's the only way to +# make the ``required`` key optional. +class _Slot(TypedDict): + """ + :meta private: + """ - job_uid: str # also used for Session UID - """Associated job or session UID""" + name: str + dtype: str -class EventLogAsset(TypedDict): +class Slot(_Slot, total=False): """ - Dictionary item in a job event log's ``imgfiles`` property (in the - ``events`` collection). Keys may be accessed with dictionary key syntax. + Full slot dictionary specification type for items in the slots=... argument + when creating inputs or outputs. e.g., ``{"name": "ctf", "dtype": "ctf"}`` + or ``{"name": "background_blob", "dtype": "stat_blob", "required": False}`` - Examples: + See :py:type:`SlotSpec` for details. - >>> print(asset['filename']) - image.png - """ + Attributes: + name (str): where to find field in a corresponding .cs file e.g., + ``"background_blob"``, ``"ctf"``, ``"alignments_class_0"`` + dtype (str): name of known data type. e.g., ``"stat_blob"``, ``"ctf"``, + ``"alignments3D"``. + required (bool, optional): Whether this slot is required. Applies to + input specs only. Defaults to True. - fileid: str - """Reference to file ``_id`` property in GridFS collection""" - - filename: str - """File name""" + """ - filetype: AssetContentType - """File content type, e.g., "image/png" """ + required: bool class Datafield(TypedDict): """ - Definition of a prefix field within a CS file. - - Examples: - - >>> field = Datafield(dtype='alignments3D', prefix='alignments_class_0', required=False) - >>> print(field['dtype']) - alignments3D + Deprecated. Use :py:class:`Slot` instead. """ dtype: str - """Datatype-specific string from based on entry in - ``cryosparc_compute/jobs/common.py``. e.g., "movie_blob", "ctf", - "alignments2D".""" - prefix: str - """where to find field in an associated ``.cs`` file. e.g., - "alignments_class_1" """ - required: bool - """whether this field must necessarily exist in a corresponding - input/output. Assumed to be ``True`` if not specified""" -SlotSpec = Union[str, Datafield] -""" -A result slot specification for the slots=... argument. +SlotSpec = Union[str, Slot, Datafield] """ +A result slot specification for items in the slots=... argument when creating +inputs or outputs. +In CryoSPARC, all jobs have one or more inputs and outputs. An input or output +has some broad :py:type:`Datatype`, such as ``"exposure"`` or ``"particle"``. +Each input or output also has a list of associated "low-level" results created +at various stages of processing, such as ``"location"`` for picked particles and +``blob`` for extracted particles. A slot represents one of these low-level +results. -class InputSlot(TypedDict): - """ - Dictionary entry in Job document's ``input_slot_groups.slots`` property. - """ - - type: Datatype - """Cryo-EM native data type, e.g., "exposure", "particle" or "volume" """ - - name: str - """Input slot name, e.g., "movie_blob" or "location" """ - - title: str - """Human-readable input slot title""" - - description: str - """Human-readable description""" - - optional: bool - """If True, input is not required for the job""" - - -class ConnectionSlot(TypedDict): - """ - Slots specified entry in a Job document's ``input_slot_groups[].connections[].slots`` list. - """ - - slot_name: Optional[str] - """Passthrough slots have ``slot_name`` set to ``None``.""" - - job_uid: str - """Parent job UID source of this input slot connection.""" - - group_name: str - """Name of output group in parent job. e.g., "particles" """ - - result_name: str - """Name of output slot in parent job, e.g., "blob" """ - - result_type: str - """Type of result slot based on entry in ``cryosparc_compute/jobs/common.py``, e.g., "particle.blob" """ - - version: Union[int, Literal["F"]] - """Version number or specifier to use. Usually "F" """ - - -class Connection(TypedDict): - """ - Connection element specified in a Job document's ``input_slot_groups[].connections`` list. - """ - - job_uid: str - """Parent job UID source of main input group connection.""" - - group_name: str - """Name of output group in parent job. e.g., "particles" """ - - slots: List[ConnectionSlot] - """List of connection specifiers for each slot""" - - -class InputSlotGroup(TypedDict): - """Element specified in a Job document's ``input_slot_groups`` list.""" - - type: Datatype - """Possible Cryo-EM data type for this group, e.g., "particle".""" - - name: str - """Input group name, e.g., "particles".""" - - title: str - """Human-readable input group title.""" - - description: str - """Human-readable input group description.""" - - count_min: int - """Minimum required output groups that may be connected to this input slot.""" - - count_max: Optional[int] - """Maximum allowed output groups that may be connected to this input slot. Infinity if not specified.""" - - repeat_allowed: bool - """If True, the same output group may be connected twice.""" - - slots: List[InputSlot] - """List of slot definitions in the input group.""" - - connections: List[Connection] - """Connected output for this input group.""" - - -class OutputResultGroupContains(TypedDict): - """ - Elements of a Job document's ``output_result_groups[].contains`` list. - """ - - uid: str - """Result unique ID, e.g., "J42-R1".""" - - type: str - """Result type based on entry in ``cryosparc_compute/jobs/common.py``, e.g., "particle.alignments3D".""" - - name: str - """Name of output result (a.k.a. slot), e.g., "alignments_class_1".""" - - group_name: str - """Name of output group, e.g., "particles".""" - - passthrough: bool - """If True, this result is passed through as-is from an associated input.""" - - -class OutputResultGroup(TypedDict): - """ - Elements of a Job document's ``output_result_groups`` list. - """ - - uid: str - """Ouptut group unique ID, e.g., "J42-G1".""" +In the CryoSPARC interface, open a job's "Inputs" or "Outputs" tab to see the +kinds of slots available. You may also download an output and load it with +:py:type:`~cryosparc.dataset.Dataset` to inspect the infomation encoded in its +results. - type: Datatype - """Possible Cryo-EM data type for this group, e.g., "particle".""" +Provide each slot as either a string representing a name and result type, or a +full dictionary specification. - name: str - """Output group name, e.g., "particles_selected" """ - - title: str - """Human-readable output group title.""" - - description: str - """Human-readable output group description.""" - - contains: List[OutputResultGroupContains] - """List of specific results (a.k.a. slots) in this output group.""" - - passthrough: Union[str, Literal[False]] - """Either ``False`` if this is a newly-created output or the name of an - input group used to forward passthrough slots for this result group.""" - - num_items: int - """Number of rows in the dataset for this result group populated by jobs when they run.""" - - summary: dict - """Context-specific details about this result populated by jobs when they run.""" - - -class OutputResult(TypedDict): - """ - Detailed schema and metadata for a Job document's ``output_results`` list. - Similar to a flattened ``output_result_groups[].contains`` but with more - details. - """ - - uid: str - """Result unique ID, e.g., "J42-R1".""" - - type: str - """Result type based on entry in ``cryosparc_compute/jobs/common.py``, e.g., "particle.alignments3D".""" - - name: str - """Name of output result (a.k.a. slot), e.g., "alignments_class_1".""" - - group_name: str - """Name of output group, e.g., "particles".""" - - title: str - """Human-readable output result title.""" - - description: str - """Human-readable output result description.""" +A string in the format ``""`` is a shortcut for ``{"name": "", +"dtype": "", "required": True}``. - versions: List[int] - """List of available intermediate result version numbers.""" +A string in the format ``"?"`` is a shortcut for ``{"name": "", +"dtype": "", "required": False}`` (input slots only). - metafiles: List[str] - """List of available intermediate result files (same size as ``versions``).""" +Example strings:: - min_fields: List[Tuple[str, str]] - """Minimum included dataset field definitions in this result.""" + "ctf" + "micrograph_blob" + "?background_blob" - num_items: int - """Number of rows in the dataset for this result populated by jobs when they run.""" +Example equivalent full specifications:: - passthrough: bool - """If True, this result is passed through as-is from an associated input.""" + {"name": "ctf", "dtype": "ctf"} + {"name": "micrograph_blob", "dtype": "micrograph_blob", "required": True} + {"name": "background_blob", "dtype": "stat_blob", "required": False} +Use the full specification when the ``dtype`` cannot be inferred from the +``name`` string because it is dynamic. For example, 3D Variability job +``particles`` outputs have slots named ``"components_mode_X"`` with dtype +``"components"`` where ``X`` is a mode number:: -class BaseParam(TypedDict): - """ - Base parameter specification. - """ - - value: bool - """Base parameter value. Should not be changed.""" - - title: str - """Human-readable parameter title.""" - - desc: str - """Human-readable parameter description.""" - - order: int - """Parameter order in the builder list.""" - - section: str - """Parameter section identifier.""" - - advanced: bool - """True if this is an advanced parameter (hidden unlesss the "Advanced" - checkbox is enabled in the Job Builder".""" - - hidden: bool - """If True, this parameter is always hidden from the interface.""" - - -class Param(BaseParam): - """ - Specifies possible values for type property. Inherits from - BaseParam_. - - .. _BaseParam: - #cryosparc.spec.BaseParam - """ - - type: Literal["number", "string", "boolean"] - """Possible Parameter type.""" - - -class EnumParam(BaseParam): - """ - Additional Param keys available for enum params. Inherits from BaseParam_. - - .. _BaseParam: - #cryosparc.spec.BaseParam - """ - - type: Literal["enum"] - """Possible Parameter type.""" - - enum_keys: List[str] - """Possible enum names for display for selection. Parameter must be set to - one of these values.""" - - enum_dict: Dict[str, Any] - """Map from enum key names to their equivalent values.""" - - -class PathParam(BaseParam): - """ - Additional Param key available for path params. Inherits Inherits from - BaseParam_. - - .. _BaseParam: - #cryosparc.spec.BaseParam - """ - - type: Literal["path"] - - path_dir_allowed: bool - """If True, directories may be specified.""" - - path_file_allowed: bool - """If True, files may be specified.""" - - path_glob_allowed: bool - """If True, a wildcard string that refers to many files may be specified..""" - - -class ParamSpec(TypedDict): - """Param specification. Dictionary with single ``"value"`` key.""" - - value: Any - """Value of param.""" - - -class ParamSection(TypedDict): - """Param section specification""" - - title: str - """Parameter section title""" - desc: str - """Parameter section description""" - order: int - """Order for this parameter section to appear in the job builder""" - - -class ProjectLastAccessed(TypedDict, total=False): - """ - Details on when a project was last accessed. - """ - - name: str - """User account name that accessed this project.""" - - accessed_at: str - """Last access date in ISO 8601 format.""" - - -class ProjectDocument(TypedDict): - """ - Specification for a project document in the MongoDB database. - """ - - _id: str - """MongoDB ID""" - - uid: str - """Project unique ID, e.g., "J42".""" - - uid_num: int - """Project number, e.g., 42.""" - - title: str - """Human-readable Project title.""" - - description: str - """Human-readable project markdown description.""" - - project_dir: str - """Project directory on disk. May include unresolved shell variables.""" - - project_params_pdef: dict - """Project-level job parameter default definitions.""" - - owner_user_id: str - """Object ID of user account that created this project.""" - - created_at: str - """Project creation date in ISO 8601 format.""" - - deleted: bool - """Whether this project has been deleted from the interface.""" - - users_with_access: List[str] - """Object IDs of user accounts that may access this project.""" - - size: int - """Computed size of project on disk.""" - - last_accessed: ProjectLastAccessed - """Details about when the project was last accessed by a user account.""" - - archived: bool - """Whether this project has been marked as archived from the inteface.""" - - detached: bool - """Whether this project is detached.""" - - hidden: bool - """Whether this project is hidden.""" - - project_stats: dict - """Computed project statistics.""" - - generate_intermediate_results_settings: dict - """Project settings for generating intermediate results for specific job types.""" - - -class JobDocument(TypedDict): - """ - Specification for a Job document from the MongoDB database. - """ - - _id: str - """MongoDB ID""" - - uid: str - """Job unique ID, e.g., "J42".""" - - uid_num: int - """Job number, e.g., 42.""" - - project_uid: str - """Project unique ID, e.g., "P3".""" - - project_uid_num: int - """Project number, e.g., 3.""" - - type: str - """Job type identifier, e.g., "class2d".""" - - job_type: str - """Alias for type key""" - - title: str - """Human-readable job title.""" - - description: str - """Human-readable job markdown description.""" - - status: JobStatus - """Job scheduling status, e.g., "building", "queued", "running".""" - - created_at: str - """Job creation date in ISO 8601 format.""" - - created_by_user_id: Optional[str] - """Object ID of user account that created this job.""" - - deleted: bool - """True if the job has been marked as deleted.""" - - parents: List[str] - """List of parent jobs UIDs based on input connections.""" - - children: List[str] - """List of child job UIDs based on output connections.""" - - input_slot_groups: List[InputSlotGroup] - """Input group specifications, including schema and connection information.""" - - output_result_groups: List[OutputResultGroup] - """Output group specifications.""" - - output_results: List[OutputResult] - """Aggregated output results specification (similar to - ``output_result_groups`` with additional field information).""" - - params_base: Dict[str, Union[Param, EnumParam, PathParam]] - """Job param specification and their base values. Each key represents a - parameter name.""" - - params_spec: Dict[str, ParamSpec] - """User-specified parameter values. Each key is a parameter value. Not all - keys from ``params_base`` are included here, only ones that were explicitly - set.""" - - params_secs: Dict[str, ParamSection] - """Parameter section definitions""" - - workspace_uids: List[str] - """List of workspace UIDs this job belongs to.""" - - -class WorkspaceDocument(TypedDict): - """ - Specification for a Workspace document from the MongoDB database. - Live-related fields are not yet included. - """ - - _id: str - """MongoDB ID""" - - uid: str - """Workspace unique ID, e.g., "W1".""" - - uid_num: int - """Workspace number, e.g., 1.""" - - project_uid: str - """Project unique ID, e.g., "P3".""" - - project_uid_num: int - """Project number, e.g., 3.""" - - created_at: str - """Workspace creation date in ISO 8601 format.""" - - created_by_user_id: str - """Object ID of user account that created this workspace.""" - - deleted: bool - """True if the workspace has been marked as deleted.""" - - title: str - """Human-readable workspace title.""" - - description: Optional[str] - """Human-readable workspace markdown description.""" - - workspace_type: Literal["base", "live"] - """Either "live" or "base". """ - - -class ResourceSlots(TypedDict): - """ - Listings of available resources on a worker node that may be allocated for - scheduling. - """ - - CPU: List[int] - """List of available CPU core indices.""" - GPU: List[int] - """List of available GPU indices.""" - RAM: List[int] - """List of available 8GB slots.""" - - -class FixedResourceSlots(TypedDict): - """ - Available resource slots that only indicate presence, not the amount that - may be allocated. (i.e., "SSD is available or not available") - """ - - SSD: bool - """Whether this target thas an SSD""" - - -class Gpu(TypedDict): - """ - GPU details for a target. - """ - - id: int - """Index of GPU. Generally based on which PCI slot the GPU occupies.""" - name: str - """Identifiable model name for this GPU, e.g.,"GeForce RTX 3090".""" - mem: int - """Amount of memory available on this GPU, in bytes.""" - - -class SchedulerLane(TypedDict): - """ - Description for a CryoSPARC scheduler lane. - """ - - name: str - """Identifier for this lane.""" - type: Literal["node", "cluster"] - """What kind of lane this is based on how on what kind of target(s) it contains.""" - title: str - """Human-readable lane title.""" - desc: str - """Human-readable lane description.""" - - -class BaseSchedulerTarget(TypedDict): - """ - Properties shared by both node and cluster scheduler targets. - """ - - lane: str - """Lane name this target belongs to.""" - - name: str - """Identifier for this target.""" - - title: str - """Human-readable title for this target.""" - - desc: Optional[str] - """Human-readable description for this target.""" - - hostname: str - """Network machine hostname (same as name for for clusters).""" - - worker_bin_path: str - """Path to cryosparc_worker/bin/cryosparcw executable.""" - - cache_path: Optional[str] - """Path the SSD cache scratch directory, if applicable.""" - - cache_reserve_mb: int # 10G default - """Ensure at least this much space is free on the SSD scratch drive before - caching.""" - - cache_quota_mb: int - """Do not cache more than this amoun on the SSD scrath drive..""" - - -class SchedulerTargetNode(BaseSchedulerTarget): - """ - node-type scheduler target that does not include GPUs. Inherits from - BaseSchedulerTarget_. - - .. _BaseSchedulerTarget: - #cryosparc.spec.BaseSchedulerTarget - """ - - type: Literal["node"] - """Node scheduler targets have type "node".""" - - ssh_str: str - """Shell command used to access this node, e.g., ``ssh cryosparcuser@worker``.""" - - resource_slots: ResourceSlots - """Available compute resources.""" - - resource_fixed: FixedResourceSlots - """Available fixed resources.""" - - monitor_port: Optional[int] - """Not used.""" - - -class SchedulerTargetGpuNode(SchedulerTargetNode): - """ - node-type scheduler target that includes GPUs. Inherits from - BaseSchedulerTarget_ and SchedulerTargetNode_. - - .. _BaseSchedulerTarget: - #cryosparc.spec.BaseSchedulerTarget - .. _SchedulerTargetNode: - #cryosparc.spec.SchedulerTargetNode - """ - - gpus: List[Gpu] - """Details about GPUs available on this node.""" - - -class SchedulerTargetCluster(BaseSchedulerTarget): - """ - Cluster-type scheduler targets. Inherits from BaseSchedulerTarget_. - - .. _BaseSchedulerTarget: - #cryosparc.spec.BaseSchedulerTarget - """ - - type: Literal["cluster"] - """Cluster scheduler targets have type "cluster".""" - - script_tpl: str - """Full cluster submission script Jinja template.""" - - send_cmd_tpl: str - """Template command to access the cluster and running commands.""" - - qsub_cmd_tpl: str - """Template command to submit jobs to the cluster.""" - - qstat_cmd_tpl: str - """Template command to check the cluster job by its ID.""" - - qdel_cmd_tpl: str - """Template command to delete cluster jobs.""" - - qinfo_cmd_tpl: str - """Template command to check cluster queue info.""" - + [ + "blob", + "?locations", + {"name": "components_mode_0", "dtype": "components"}, + {"name": "components_mode_1", "dtype": "components", "required": False}, + {"name": "components_mode_2", "dtype": "components", "required": False}, + ] -SchedulerTarget = Union[SchedulerTargetNode, SchedulerTargetGpuNode, SchedulerTargetCluster] +Note that the ``required`` key only applies to input slots. """ -Scheduler target details. -""" - - -class JobSpec(TypedDict): - """ - Specification for a Job document from the CryoSPARC's job register. - """ - - name: str - """Job's machine-readable type, e.g., 'homo_abinit'.""" - title: str - """Job's human-readable name, e.g., 'Ab-Initio Reconstruction'.""" - shorttitle: str - """Short-version of name, e.g., 'Ab-Initio'.""" - description: str - """Detailed description of job type""" - - input_slot_groups: List[InputSlotGroup] - """Description of available inputs.""" - params_base: Dict[str, Union[Param, EnumParam, PathParam]] - """Description of available parameters.""" - params_secs: Dict[str, ParamSection] - """Description of parameter sections.""" - - is_interactive: bool - """If True, this job is requires interaction. "Curate Exposures" and "Select - 2D Classes" are examples of interactive jobs.""" - is_lightweight: bool - """If True, does job does not require GPUs and requires few-enough - resources that it can usually run directly on the master machine.""" - hidden: bool - """If True, job is not visible in the interface.""" - develop_only: bool - """If True, job is in development and not available to run.""" class JobSection(TypedDict): """ - Specification of available job types of a certain category. - - Examples: - - >>> { - ... "name": "refinement", - ... "title": "3D Refinement", - ... "description: "...", - ... "contains" : [ - ... "homo_refine", - ... "hetero_refine", - ... "nonuniform_refine", - ... "homo_reconstruct" - ... ] - ... } + Deprecated. Use :py:class:`~cryosparc.models.job_register.JobRegister` + instead. """ name: str - """Section identifier.""" title: str - """Human-readable section title.""" description: str - """Human-readable section description.""" contains: List[str] - """List of available job types in this category""" - - -class JobSpecSection(TypedDict): - """ - Similar to JobSection_, except each item in ``contains`` is a detailed - JobSpec_. - - - .. _JobSection: - #cryosparc.spec.JobSection - .. _JobSpec: - #cryosparc.spec.JobSpec - """ - - name: str - """Section identifier.""" - title: str - """Human-readable section title.""" - description: str - """Human-readable section description.""" - contains: List[JobSpec] - """List of job details available in this category""" - - -class MongoController(ABC, Generic[D]): - """ - Abstract base class for Project, Workspace, Job classes and any other types - that have underlying Mongo database documents. - - Generic type argument D is a typed dictionary definition for a Mongo - document. - - :meta private: - """ - - _doc: Optional[D] = None - - @property - def doc(self) -> D: - if not self._doc: - self.refresh() - assert self._doc, "Could not refresh database document" - return self._doc - - @abstractmethod - def refresh(self) -> "Self": - # Must be implemented in subclasses - return self diff --git a/cryosparc/star.py b/cryosparc/star.py index 0c6d2b6e..b802f583 100644 --- a/cryosparc/star.py +++ b/cryosparc/star.py @@ -3,10 +3,23 @@ """ from pathlib import PurePath -from typing import IO, TYPE_CHECKING, Any, Callable, Dict, List, Mapping, Optional, Tuple, Type, Union, overload +from typing import ( + IO, + TYPE_CHECKING, + Any, + Callable, + Dict, + List, + Literal, + Mapping, + Optional, + Tuple, + Type, + Union, + overload, +) import numpy as n -from typing_extensions import Literal if TYPE_CHECKING: from numpy.typing import NDArray # type: ignore diff --git a/cryosparc/stream.py b/cryosparc/stream.py index b1ab6d13..07b8a393 100644 --- a/cryosparc/stream.py +++ b/cryosparc/stream.py @@ -5,33 +5,62 @@ from typing import ( IO, TYPE_CHECKING, - Any, - AsyncGenerator, AsyncIterator, + Awaitable, BinaryIO, - Generator, Iterator, Optional, + Protocol, Union, + overload, ) -from typing_extensions import Protocol - if TYPE_CHECKING: - from typing_extensions import Self # not present in typing-extensions=3.7 + from typing_extensions import Buffer, Self + +from .constants import EIGHT_MIB +from .util import bopen + + +class AsyncReadable(Protocol): + """Any object that has an async read(size) method""" + + def read(self, size: int = ..., /) -> Awaitable[bytes]: ... + + +class AsyncWritable(Protocol): + """Any object that has an async write(buffer) method""" + + def write(self, b: "Buffer", /) -> Awaitable[int]: ... + + +class AsyncBinaryIterator(Protocol): + """ + Any object that asynchronously yields bytes when iterated e.g.:: + async for chunk in obj: + print(chunk.decode()) + """ -class AsyncBinaryIO(Protocol): - async def read(self, n: Optional[int] = None) -> bytes: ... + def __aiter__(self) -> AsyncIterator[bytes]: ... + def __anext__(self) -> Awaitable[bytes]: ... -class BinaryIteratorIO(BinaryIO): +class BinaryIteratorIO(BinaryIO, Iterator[bytes]): """Read through a iterator that yields bytes as if it was a file""" - def __init__(self, iter: Union[Iterator[bytes], Generator[bytes, Any, Any]]): + def __init__(self, iter: Iterator[bytes]): self._iter = iter self._left = b"" + def __iter__(self): + assert not self._left, "Cannot iterate over a stream that has already been read" + return iter(self._iter) + + def __next__(self): + assert not self._left, "Cannot iterate over a stream that has already been read" + return next(self._iter) + def readable(self): return True @@ -66,13 +95,21 @@ def read(self, n: Optional[int] = None): return b"".join(out) -class AsyncBinaryIteratorIO(AsyncBinaryIO): +class AsyncBinaryIteratorIO(AsyncReadable, AsyncBinaryIterator, AsyncIterator[bytes]): """Similar to BinaryIteratorIO except the iterator yields bytes asynchronously""" - def __init__(self, iter: Union[AsyncIterator[bytes], AsyncGenerator[bytes, Any]]): + def __init__(self, iter: AsyncBinaryIterator): self._iter = iter self._left = b"" + def __aiter__(self): + assert not self._left, "Cannot iterate over a stream that has already been read" + return self._iter.__aiter__() + + def __anext__(self): + assert not self._left, "Cannot iterate over a stream that has already been read" + return self._iter.__anext__() + def readable(self): return True @@ -93,7 +130,7 @@ async def read(self, n: Optional[int] = None): out = [] if n is None or n < 0: while True: - m = self._read1() + m = await self._read1() if not m: break out.append(m) @@ -108,13 +145,11 @@ async def read(self, n: Optional[int] = None): class Streamable(ABC): - @classmethod - def mime_type(cls) -> str: - """ - Return the binary mime type to use in HTTP requests when streaming this - data e.g., "application/x-cryosparc-dataset" - """ - return f"application/x-cryosparc-{cls.__name__.lower()}" + media_type = "application/octet-stream" + """ + May override in subclasses to derive correct stream type, e.g., + "application/x-cryosparc-dataset" + """ @classmethod def api_schema(cls): @@ -123,25 +158,26 @@ def api_schema(cls): instance in the request or response body. """ return { - "description": f"A binary stream representing a CryoSPARC {cls.__name__}", - "content": {cls.mime_type(): {"schema": {"title": cls.__name__, "type": "string", "format": "binary"}}}, + "description": f"A binary stream representing a {cls.__name__} class instance", + "content": {cls.media_type: {"schema": {"title": cls.__name__, "type": "string", "format": "binary"}}}, } @classmethod @abstractmethod - def load(cls, file: Union[str, PurePath, IO[bytes]]) -> "Self": + def load(cls, file: Union[str, PurePath, IO[bytes]], *, media_type: Optional[str] = None) -> "Self": """ - The given stream param must at least implement an async read method + Load stream from a file path or readable byte stream. The stream must + at least implement the `read(size)` function. """ ... @classmethod - def from_iterator(cls, source: Iterator[bytes]): - return cls.load(BinaryIteratorIO(source)) + def from_iterator(cls, source: Iterator[bytes], *, media_type: Optional[str] = None): + return cls.load(BinaryIteratorIO(source), media_type=media_type) @classmethod @abstractmethod - async def from_async_stream(cls, stream: AsyncBinaryIO) -> "Self": + async def from_async_stream(cls, stream: AsyncReadable, *, media_type: Optional[str] = None) -> "Self": """ Asynchronously load from the given binary stream. The given stream parameter must at least have ``async read(n: int | None) -> bytes`` method. @@ -149,12 +185,111 @@ async def from_async_stream(cls, stream: AsyncBinaryIO) -> "Self": ... @classmethod - async def from_async_iterator(cls, iterator: Union[AsyncIterator[bytes], AsyncGenerator[bytes, None]]): - return await cls.from_async_stream(AsyncBinaryIteratorIO(iterator)) + async def from_async_iterator(cls, iterator: AsyncBinaryIterator, *, media_type: Optional[str] = None): + return await cls.from_async_stream(AsyncBinaryIteratorIO(iterator), media_type=media_type) @abstractmethod - def stream(self) -> Generator[bytes, None, None]: ... + def stream(self) -> Iterator[bytes]: ... - async def astream(self): + async def astream(self) -> AsyncIterator[bytes]: for chunk in self.stream(): yield chunk + + def save(self, file: Union[str, PurePath, IO[bytes]]): + with bopen(file, "wb") as f: + self.dump(f) + + def dump(self, file: IO[bytes]): + for chunk in self.stream(): + file.write(chunk) + + def dumps(self) -> bytes: + return b"".join(self.stream()) + + async def adump(self, file: Union[IO[bytes], AsyncWritable]): + async for chunk in self.astream(): + result = file.write(chunk) + if isinstance(result, Awaitable): + await result + + async def adumps(self) -> bytes: + from io import BytesIO + + data = BytesIO() + await self.adump(data) + return data.getvalue() + + +class Stream(Streamable): + """ + Generic stream that that leaves handling of the stream data to the caller. + May accept stream data in any streamable format, though async formats + must be consumed with async functions. + """ + + @overload + def __init__(self, *, stream: IO[bytes] = ..., media_type: Optional[str] = ...): ... + @overload + def __init__(self, *, iterator: Iterator[bytes] = ..., media_type: Optional[str] = ...): ... + @overload + def __init__(self, *, astream: AsyncReadable = ..., media_type: Optional[str] = ...): ... + @overload + def __init__(self, *, aiterator: AsyncBinaryIterator = ..., media_type: Optional[str] = ...): ... + def __init__( + self, + *, + stream: Optional[IO[bytes]] = None, + iterator: Optional[Iterator[bytes]] = None, + astream: Optional[AsyncReadable] = None, + aiterator: Optional[AsyncBinaryIterator] = None, + media_type: Optional[str] = None, + ): + if (stream is not None) + (iterator is not None) + (astream is not None) + (aiterator is not None) != 1: + raise TypeError("Exactly one of stream, iterator, astream or aiterator must be provided") + self._stream = stream + self._iterator = iterator + self._astream = astream + self._aiterator = aiterator + self.media_type = media_type or self.media_type + + @property + def asynchronous(self): + return (self._astream is not None) or (self._aiterator is not None) + + @classmethod + def load(cls, file: Union[str, PurePath, IO[bytes]], *, media_type: Optional[str] = None): + stream = open(file, "rb") if isinstance(file, (str, PurePath)) else file + return cls(stream=stream, media_type=media_type) + + @classmethod + def from_iterator(cls, source: Iterator[bytes], *, media_type: Optional[str] = None): + return cls(iterator=source, media_type=media_type) + + @classmethod + async def from_async_stream(cls, stream: AsyncReadable, *, media_type: Optional[str] = None): + return cls(astream=stream, media_type=media_type) + + @classmethod + async def from_async_iterator(cls, iterator: AsyncBinaryIterator, *, media_type: Optional[str] = None): + return cls(aiterator=iterator, media_type=media_type) + + def stream(self) -> Iterator[bytes]: + if self._stream: + while chunk := self._stream.read(EIGHT_MIB): + yield chunk + elif self._iterator: + for chunk in self._iterator: + yield chunk + else: + raise TypeError("This is an asynchronous stream, must use astream() instead") + + async def astream(self) -> AsyncIterator[bytes]: + if self._stream or self._iterator: + for chunk in self.stream(): + yield chunk + elif self._astream: + while chunk := await self._astream.read(EIGHT_MIB): + yield chunk + elif self._aiterator: + async for chunk in self._aiterator: + yield chunk diff --git a/cryosparc/stream_registry.py b/cryosparc/stream_registry.py new file mode 100644 index 00000000..5a5d751a --- /dev/null +++ b/cryosparc/stream_registry.py @@ -0,0 +1,6 @@ +from .dataset import Dataset +from .registry import register_stream_class +from .stream import Stream + +register_stream_class(Dataset) +register_stream_class(Stream) diff --git a/cryosparc/tools.py b/cryosparc/tools.py index 0369f189..b87994ff 100644 --- a/cryosparc/tools.py +++ b/cryosparc/tools.py @@ -22,36 +22,41 @@ import os import re import tempfile -from io import BytesIO -from pathlib import Path, PurePath, PurePosixPath -from typing import IO, TYPE_CHECKING, Any, Container, Dict, Iterable, List, Optional, Tuple, Union -from warnings import warn +import warnings +from contextlib import contextmanager +from functools import cached_property +from hashlib import sha256 +from io import BytesIO, TextIOBase +from pathlib import PurePath, PurePosixPath +from typing import IO, TYPE_CHECKING, Any, Container, Dict, Iterable, List, Optional, Tuple, Union, get_args import numpy as n -from .errors import InvalidSlotsError +from . import __version__, model_registry, mrc, registry, stream_registry +from .api import APIClient +from .controllers import as_output_slot +from .controllers.job import ExternalJobController, JobController +from .controllers.project import ProjectController +from .controllers.workspace import WorkspaceController +from .dataset import CSDAT_FORMAT, DEFAULT_FORMAT, Dataset +from .dataset.row import R +from .models.asset import GridFSFile +from .models.external import ExternalOutputSpec +from .models.job_register import JobRegister +from .models.job_spec import Category, OutputRef, OutputSpec +from .models.scheduler_lane import SchedulerLane +from .models.scheduler_target import SchedulerTarget +from .models.user import User +from .spec import Datatype, JobSection, SlotSpec +from .stream import BinaryIteratorIO, Stream +from .util import clear_cached_property, padarray, print_table, trimarray if TYPE_CHECKING: - from numpy.typing import NDArray # type: ignore - -from . import __version__, mrc -from .command import CommandClient, CommandError, make_json_request, make_request -from .dataset import DEFAULT_FORMAT, Dataset -from .job import ExternalJob, Job -from .project import Project -from .row import R -from .spec import ( - ASSET_EXTENSIONS, - AssetDetails, - Datatype, - JobSection, - JobSpecSection, - SchedulerLane, - SchedulerTarget, - SlotSpec, -) -from .util import bopen, noopcontext, padarray, print_table, trimarray -from .workspace import Workspace + from numpy.typing import NDArray + +assert stream_registry +assert model_registry +registry.finalize() # no more models may be registered after this ONE_MIB = 2**20 # bytes in one mebibyte @@ -79,29 +84,28 @@ class CryoSPARC: High-level session class for interfacing with a CryoSPARC instance. Initialize with the host and base port of the running CryoSPARC instance. - This host and (at minimum) ``base_port + 2``, ``base_port + 3`` and - ``base_port + 5`` should be accessible on the network. + This host and (at minimum) ``base_port + 2`` should be accessible on the + network. Args: - license (str, optional): CryoSPARC license key. Defaults to - ``os.getenv("CRYOSPARC_LICENSE_ID")``. + base_url (str, optional): CryoSPARC instance URL, e.g., + "http://localhost:39000" or "https://cryosparc.example.com". + Same URL used to access CryoSPARC from a web browser. host (str, optional): Hostname or IP address running CryoSPARC master. - Defaults to ``os.getenv("CRYOSPARC_MASTER_HOSTNAME", "localhost")``. - base_port (int, optional): CryoSPARC services base port number. Defaults - to ``os.getenv("CRYOSPARC_MASTER_HOSTNAME", 39000)``. + Cannot be specified with ``base_url``. Defaults to + ``os.getenv("CRYOSPARC_MASTER_HOSTNAME", "localhost")``. + base_port (int, optional): CryoSPARC services base port number. + Cannot be specified with ``base_url``. Defaults to + ``os.getenv("CRYOSPARC_BASE_PORT", 39000)``. email (str, optional): CryoSPARC user account email address. Defaults to ``os.getenv("CRYOSPARC_EMAIL")``. password (str, optional): CryoSPARC user account password address. Defaults to ``os.getenv("CRYOSPARC_PASSWORD")``. + license (str, optional): (Deprecated) CryoSPARC license key. Defaults to + ``os.getenv("CRYOSPARC_LICENSE_ID")``. timeout (int, optional): Timeout error for HTTP requests to CryoSPARC command services. Defaults to 300. - Attributes: - cli (CommandClient): HTTP/JSONRPC client for ``command_core`` service (port + 2). - vis (CommandClient): HTTP/JSONRPC client for ``command_vis`` service (port + 3). - rtp (CommandClient): HTTP/JSONRPC client for ``command_rtp`` service (port + 5). - user_id (str): Mongo object ID of user account performing operations for this session. - Examples: Load project job and micrographs @@ -132,59 +136,79 @@ class CryoSPARC: "J43" """ - cli: CommandClient - vis: CommandClient - rtp: CommandClient - user_id: str # session user ID + api: APIClient + """ + HTTP REST API client for ``api`` service (port + 2). + """ + + base_url: str + """ + URL used for communication CryoSPARC instance REST API. + """ def __init__( self, - license: str = os.getenv("CRYOSPARC_LICENSE_ID", ""), - host: str = os.getenv("CRYOSPARC_MASTER_HOSTNAME", "localhost"), - base_port: int = int(os.getenv("CRYOSPARC_BASE_PORT", 39000)), - email: str = os.getenv("CRYOSPARC_EMAIL", ""), - password: str = os.getenv("CRYOSPARC_PASSWORD", ""), + base_url: Optional[str] = os.getenv("CRYOSPARC_BASE_URL"), + *, + host: Optional[str] = os.getenv("CRYOSPARC_MASTER_HOSTNAME"), + base_port: Union[int, str, None] = os.getenv("CRYOSPARC_BASE_PORT"), + email: Optional[str] = os.getenv("CRYOSPARC_EMAIL"), + license: Optional[str] = os.getenv("CRYOSPARC_LICENSE_ID"), + password: Optional[str] = os.getenv("CRYOSPARC_PASSWORD"), timeout: int = 300, ): - assert LICENSE_REGEX.fullmatch(license), f"Invalid or unspecified CryoSPARC license ID {license}" - assert email, "Invalid or unspecified email" - assert password, "Invalid or unspecified password" - - self.cli = CommandClient( - service="command_core", - host=host, - port=base_port + 2, - headers={"License-ID": license}, - timeout=timeout, - ) - self.vis = CommandClient( - service="command_vis", - host=host, - port=base_port + 3, - headers={"License-ID": license}, - timeout=timeout, - ) - self.rtp = CommandClient( - service="command_rtp", - host=host, - port=base_port + 5, - headers={"License-ID": license}, - timeout=timeout, - ) + if license: + warnings.warn( + "Support for license argument and CRYOSPARC_LICENSE_ID environment variable " + "will be removed in a future release", + DeprecationWarning, + stacklevel=2, + ) + if not LICENSE_REGEX.fullmatch(license): + raise ValueError(f"Invalid CryoSPARC license ID {license}") + + if host and base_port: + if base_url: + raise TypeError("Cannot specify host and base_port when base_url is specified") + self.base_url = f"http://{host}:{int(base_port) + 2}" + elif base_url: + self.base_url = f"{base_url}/api" # app forwards to api service (TODO) + else: + raise TypeError("Must specify either base_url or host + base_port") + + auth = None + if email and password: + auth = (email, sha256(password.encode()).hexdigest()) + elif license: + auth = ("cryosparc", sha256(license.encode()).hexdigest()) + # TODO: also load auth from config profile + else: + raise ValueError( + "CryoSPARC authentication not provided. " + "Please see documentation at https://tools.cryosparc.com for instructions." + ) + + tools_major_minor_version = ".".join(__version__.split(".")[:2]) # e.g., 4.1.0 -> 4.1 try: - self.user_id = self.cli.get_id_by_email_password(email, password) # type: ignore - cs_version: str = self.cli.get_running_version() # type: ignore + self.api = APIClient(self.base_url, auth=auth, timeout=timeout) + assert self.user # trigger user profile fetch + cs_version = self.api.config.get_version() except Exception as e: - raise RuntimeError("Could not complete CryoSPARC authentication with given credentials") from e + raise RuntimeError( + f"Could not connect to CryoSPARC at {base_url} due to error:\n{e}\n" + "Please ensure your credentials are correct and that you are " + "connecting to a CryoSPARC version compatible with " + f"cryosparc-tools {tools_major_minor_version}. " + "Please see the documentation at https://tools.cryosparc.com for details." + ) from e if cs_version and VERSION_REGEX.match(cs_version): cs_major_minor_version = ".".join(cs_version[1:].split(".")[:2]) # e.g., v4.1.0 -> 4.1 - tools_major_minor_version = ".".join(__version__.split(".")[:2]) # e.g., 4.1.0 -> 4.1 tools_prerelease_url = "https://github.com/cryoem-uoft/cryosparc-tools/archive/refs/heads/develop.zip" if cs_major_minor_version != tools_major_minor_version: - warn( - f"CryoSPARC instance {host}:{base_port} with version {cs_version} " - f"may not be compatible with current cryosparc-tools version v{__version__}.\n\n" + warnings.warn( + f"CryoSPARC at {self.base_url} with version {cs_version} " + f"may not be compatible with current cryosparc-tools version {__version__}.\n\n" "To install a compatible version of cryosparc-tools:\n\n" f" pip install --force cryosparc-tools~={cs_major_minor_version}.0\n\n" "Or, if running a CryoSPARC pre-release or private beta:\n\n" @@ -192,6 +216,31 @@ def __init__( stacklevel=2, ) + @cached_property + def user(self) -> User: + """ + User account performing operations for this session. + """ + return self.api.users.me() + + @cached_property + def job_register(self) -> JobRegister: + """ + Information about jobs available on this instance. + """ + return self.api.job_register() + + def refresh(self): + """ + Reset cache and refresh instance details. + + Raises: + APIError: cannot be refreshed. + """ + clear_cached_property(self, "user") + clear_cached_property(self, "job_register") + assert self.user # ensure we can still fetch a user + def test_connection(self): """ Verify connection to CryoSPARC command services. @@ -199,28 +248,13 @@ def test_connection(self): Returns: bool: True if connection succeeded, False otherwise """ - if self.cli.test_connection(): # type: ignore - print(f"Connection succeeded to CryoSPARC command_core at {self.cli._url}") + if self.api.health() == "OK": + print(f"Connection succeeded to CryoSPARC API at {self.base_url}") + return True else: - print(f"Connection FAILED to CryoSPARC command_core at {self.cli._url}") + print(f"Connection FAILED to CryoSPARC API at {self.base_url}") return False - with make_request(self.vis, method="GET") as response: - if response.read(): - print(f"Connection succeeded to CryoSPARC command_vis at {self.vis._url}") - else: - print(f"Connection FAILED to CryoSPARC command_vis at {self.vis._url}") - return False - - with make_request(self.rtp, method="GET") as response: - if response.read(): - print(f"Connection succeeded to CryoSPARC command_rtp at {self.rtp._url}") - else: - print(f"Connection FAILED to CryoSPARC command_rtp at {self.rtp._url}") - return False - - return True - def get_lanes(self) -> List[SchedulerLane]: """ Get a list of available scheduler lanes. @@ -228,7 +262,7 @@ def get_lanes(self) -> List[SchedulerLane]: Returns: list[SchedulerLane]: Details about available lanes. """ - return self.cli.get_scheduler_lanes() # type: ignore + return self.api.resources.find_lanes() def get_targets(self, lane: Optional[str] = None) -> List[SchedulerTarget]: """ @@ -241,60 +275,63 @@ def get_targets(self, lane: Optional[str] = None) -> List[SchedulerTarget]: Returns: list[SchedulerTarget]: Details about available targets. """ - targets: List[SchedulerTarget] = self.cli.get_scheduler_targets() # type: ignore - if lane is not None: - targets = [t for t in targets if t["lane"] == lane] - return targets + return self.api.resources.find_targets(lane=lane) def get_job_sections(self) -> List[JobSection]: """ - Get a summary of job types available for this instance, organized by - category. + (Deprecated) Get a summary of job types available for this instance, + organized by category. Returns: list[JobSection]: List of job section dictionaries. Job types are listed in the ``"contains"`` key in each dictionary. """ - return self.cli.get_job_sections() # type: ignore - - def get_job_specs(self) -> List[JobSpecSection]: - """ - Get a detailed summary of job and their specification available on - this instance, organized by category. - - Returns: - - list[JobSpecSection]: List of job section dictionaries. Job specs - are listed in the ``"contains"`` key in each dictionary - """ - return self.cli.get_config_var("job_types_available") # type: ignore - - def print_job_types(self, section: Union[str, Container[str], None] = None, *, show_legacy: bool = False): + warnings.warn("Use job_register property instead", DeprecationWarning, stacklevel=2) + job_types_by_category = { + category: [spec.type for spec in self.job_register.specs if spec.category == category] + for category in get_args(Category) + } + return [ + {"name": category, "title": category.replace("_", " ").title(), "description": "", "contains": job_types} + for category, job_types in job_types_by_category.items() + ] + + def print_job_types( + self, + category: Union[Category, Container[Category], None] = None, + *, + show_legacy: bool = False, + ): """ Print a table of job types and their titles, organized by category. Args: - section (str | list[str], optional): Only show jobs from the given - section or list of sections. Defaults to None. + category (Category | list[Category], optional): Only show jobs from + the given category or list of categories. Defaults to None. show_legacy (bool, optional): If True, also show legacy jobs. Defaults to False. """ - allowed_sections = {section} if isinstance(section, str) else section - sections = self.get_job_specs() - headings = ["Section", "Job", "Title"] + allowed_categories = {category} if isinstance(category, str) else category + register = self.job_register + headings = ["Category", "Job", "Title", "Stability"] rows = [] - for sec in sections: - if allowed_sections is not None and sec["name"] not in allowed_sections: + prev_category = None + for job_spec in register.specs: + if allowed_categories is not None and job_spec.category not in allowed_categories: + continue + if job_spec.hidden or job_spec.stability == "obsolete": + continue + if not show_legacy and job_spec.stability == "legacy": continue - sec_name = sec["name"] - for job in sec["contains"]: - if job["hidden"] or job["develop_only"] or not show_legacy and "(LEGACY)" in job["title"]: - continue - rows.append([sec_name, job["name"], job["title"]]) - sec_name = "" + + category = job_spec.category + display_category = "" if category == prev_category else category + rows.append([display_category, job_spec.type, job_spec.title, job_spec.stability]) + prev_category = category + print_table(headings, rows) - def find_project(self, project_uid: str) -> Project: + def find_project(self, project_uid: str) -> ProjectController: """ Get a project by its unique ID. @@ -302,13 +339,11 @@ def find_project(self, project_uid: str) -> Project: project_uid (str): Project unique ID, e.g., "P3" Returns: - Project: project instance + ProjectController: project accessor object """ - project = Project(self, project_uid) - project.refresh() - return project + return ProjectController(self, project_uid) - def find_workspace(self, project_uid: str, workspace_uid: str) -> Workspace: + def find_workspace(self, project_uid: str, workspace_uid: str) -> WorkspaceController: """ Get a workspace accessor instance for the workspace in the given project with the given UID. Fails with an error if workspace does not exist. @@ -318,12 +353,11 @@ def find_workspace(self, project_uid: str, workspace_uid: str) -> Workspace: workspace_uid (str): Workspace unique ID, e.g., "W1" Returns: - Workspace: accessor instance + WorkspaceController: workspace accessor object """ - workspace = Workspace(self, project_uid, workspace_uid) - return workspace.refresh() + return WorkspaceController(self, (project_uid, workspace_uid)) - def find_job(self, project_uid: str, job_uid: str) -> Job: + def find_job(self, project_uid: str, job_uid: str) -> JobController: """ Get a job by its unique project and job ID. @@ -332,13 +366,11 @@ def find_job(self, project_uid: str, job_uid: str) -> Job: job_uid (str): job unique ID, e.g., "J42" Returns: - Job: job instance + JobController: job accessor object """ - job = Job(self, project_uid, job_uid) - job.refresh() - return job + return JobController(self, (project_uid, job_uid)) - def find_external_job(self, project_uid: str, job_uid: str) -> ExternalJob: + def find_external_job(self, project_uid: str, job_uid: str) -> ExternalJobController: """ Get the External job accessor instance for an External job in this project with the given UID. Fails if the job does not exist or is not an @@ -352,15 +384,11 @@ def find_external_job(self, project_uid: str, job_uid: str) -> ExternalJob: TypeError: If job is not an external job Returns: - ExternalJob: accessor instance + ExternalJobController: external job accessor object """ - job = ExternalJob(self, project_uid, job_uid) - job.refresh() - if job.doc["job_type"] != "snowflake": - raise TypeError(f"Job {project_uid}-{job_uid} is not an external job") - return job + return ExternalJobController(self, (project_uid, job_uid)) - def create_workspace(self, project_uid: str, title: str, desc: Optional[str] = None) -> Workspace: + def create_workspace(self, project_uid: str, title: str, desc: Optional[str] = None) -> WorkspaceController: """ Create a new empty workspace in the given project. @@ -370,12 +398,13 @@ def create_workspace(self, project_uid: str, title: str, desc: Optional[str] = N desc (str, optional): Markdown text description. Defaults to None. Returns: - Workspace: created workspace instance + WorkspaceController: created workspace accessor object + + Raises: + APIError: Workspace cannot be created. """ - workspace_uid: str = self.cli.create_empty_workspace( # type: ignore - project_uid=project_uid, created_by_user_id=self.user_id, title=title, desc=desc - ) - return self.find_workspace(project_uid, workspace_uid) + workspace = self.api.workspaces.create(project_uid, title=title, description=desc) + return WorkspaceController(self, workspace) def create_job( self, @@ -384,12 +413,12 @@ def create_job( type: str, connections: Dict[str, Union[Tuple[str, str], List[Tuple[str, str]]]] = {}, params: Dict[str, Any] = {}, - title: Optional[str] = None, - desc: Optional[str] = None, - ) -> Job: + title: str = "", + desc: str = "", + ) -> JobController: """ - Create a new job with the given type. Use `CryoSPARC.get_job_sections`_ - to query available job types on the connected CryoSPARC instance. + Create a new job with the given type. Use :py:attr:`job_register` + to find available job types on the connected CryoSPARC instance. Args: project_uid (str): Project UID to create job in, e.g., "P3" @@ -400,11 +429,14 @@ def create_job( value is a (job uid, output name) tuple. Defaults to {} params (dict[str, any], optional): Specify parameter values. Defaults to {}. - title (str, optional): Job title. Defaults to None. - desc (str, optional): Job markdown description. Defaults to None. + title (str, optional): Job title. Defaults to "". + desc (str, optional): Job markdown description. Defaults to "". Returns: - Job: created job instance. Raises error if job cannot be created. + JobController: created job accessor object. + + Raises: + APIError: Job cannot be created. Examples: @@ -422,49 +454,43 @@ def create_job( ... connections={"particles": ("J20", "particles_selected")} ... params={"abinit_K": 3} ... ) - - .. _CryoSPARC.get_job_sections: - #cryosparc.tools.CryoSPARC.get_job_sections """ - conn = {k: (v if isinstance(v, list) else [v]) for k, v in connections.items()} - conn = {k: [".".join(i) for i in v] for k, v in conn.items()} - job_uid: str = self.cli.make_job( # type: ignore - job_type=type, - project_uid=project_uid, - workspace_uid=workspace_uid, - user_id=self.user_id, - title=title, - desc=desc, - params=params, - input_group_connects=conn, - ) - return self.find_job(project_uid, job_uid) + job = self.api.jobs.create(project_uid, workspace_uid, params=params, type=type, title=title, description=desc) + for input_name, connection in connections.items(): + connection = [connection] if isinstance(connection, tuple) else connection + for source_job_uid, source_output_name in connection: + job = self.api.jobs.connect( + job.project_uid, + job.uid, + input_name, + source_job_uid=source_job_uid, + source_output_name=source_output_name, + ) + return JobController(self, job) def create_external_job( self, project_uid: str, workspace_uid: str, - title: Optional[str] = None, - desc: Optional[str] = None, - ) -> ExternalJob: + title: str = "", + desc: str = "", + ) -> ExternalJobController: """ Add a new External job to this project to save generated outputs to. - Args: - project_uid (str): Project UID to create in, e.g., "P3" - workspace_uid (str): Workspace UID to create job in, e.g., "W1" - title (str, optional): Title for external job (recommended). - Defaults to None. - desc (str, optional): Markdown description for external job. - Defaults to None. + Args: + project_uid (str): Project UID to create in, e.g., "P3" + workspace_uid (str): Workspace UID to create job in, e.g., "W1" + title (str, optional): Title for external job (recommended). + Defaults to "". + desc (str, optional): Markdown description for external job. + Defaults to "". - Returns: - ExternalJob: created external job instance + Returns: + ExternalJobController: created external job accessor object """ - job_uid: str = self.vis.create_external_job( # type: ignore - project_uid=project_uid, workspace_uid=workspace_uid, user=self.user_id, title=title, desc=desc - ) - return self.find_external_job(project_uid, job_uid) + job = self.api.jobs.create(project_uid, workspace_uid, type="snowflake", title=title, description=desc) + return ExternalJobController(self, job) def save_external_result( self, @@ -475,8 +501,8 @@ def save_external_result( name: Optional[str] = None, slots: Optional[List[SlotSpec]] = None, passthrough: Optional[Tuple[str, str]] = None, - title: Optional[str] = None, - desc: Optional[str] = None, + title: str = "", + desc: str = "", ) -> str: """ Save the given result dataset to the project. Specify at least the @@ -539,16 +565,14 @@ def save_external_result( passthrough (tuple[str, str], optional): Indicates that this output inherits slots from the specified output. e.g., ``("J1", "particles")``. Defaults to None. - title (str, optional): Human-readable title for this output. - Defaults to None. + Defaults to "". desc (str, optional): Markdown description for this output. Defaults - to None. + to "". Raises: - CommandError: General CryoSPARC network access error such as + APIError: General CryoSPARC network access error such as timeout, URL or HTTP - InvalidSlotsError: slots argument is invalid Returns: str: UID of created job where this output was saved @@ -558,35 +582,47 @@ def save_external_result( prefixes = dataset.prefixes() if slots is None: slots = list(prefixes) - slot_names = {s if isinstance(s, str) else s["prefix"] for s in slots} - assert slot_names.intersection(prefixes) == slot_names, "Given dataset missing required slots" - - passthrough_str = ".".join(passthrough) if passthrough else None - try: - job_uid, output = self.vis.create_external_result( # type: ignore - project_uid=project_uid, - workspace_uid=workspace_uid, - type=type, + elif any(isinstance(s, dict) and "prefix" in s for s in slots): + warnings.warn("'prefix' slot key is deprecated. Use 'name' instead.", DeprecationWarning, stacklevel=2) + + # Normalize slots to OutputSlot or strings + output_slots = [s if isinstance(s, str) else as_output_slot(s) for s in slots] + required_slot_names = {s if isinstance(s, str) else s.name for s in output_slots} + missing_slot_names = required_slot_names.difference(prefixes) + if missing_slot_names: + raise ValueError(f"Given dataset missing required slots: {', '.join(missing_slot_names)}") + + if not name: + name = type + if not title: + title = name.replace("_", " ").title() + + # Find the most recent workspace or create a new one if the project is empty + if workspace_uid is None: + # TODO: limit find to one workspace + workspaces = self.api.workspaces.find(project_uid=[project_uid], order=-1) + workspace = workspaces[0] if workspaces else self.api.workspaces.create(project_uid, title=title) + workspace_uid = workspace.uid + + job = self.api.jobs.create_external_result( + project_uid, + workspace_uid, + ExternalOutputSpec( name=name, - slots=slots, - passthrough=passthrough_str, - user=self.user_id, - title=title, - desc=desc, - ) - except CommandError as err: - if err.code == 422 and err.data and "slots" in err.data: - raise InvalidSlotsError("save_external_result", err.data["slots"]) from err - raise - - job = self.find_external_job(project_uid, job_uid) + spec=OutputSpec(type=type, title=title, description=desc, slots=output_slots), + connection=OutputRef(job_uid=passthrough[0], output=passthrough[1]) if passthrough else None, + ), + ) + job = ExternalJobController(self, job) with job.run(): - job.save_output(output, dataset) - + job.save_output(name, dataset) return job.uid def list_files( - self, project_uid: str, prefix: Union[str, PurePosixPath] = "", recursive: bool = False + self, + project_uid: str, + prefix: Union[str, PurePosixPath] = "", + recursive: bool = False, ) -> List[str]: """ Get a list of files inside the project directory. @@ -601,12 +637,9 @@ def list_files( Returns: list[str]: List of file paths relative to the project directory. """ - return self.vis.list_project_files( # type: ignore - project_uid=project_uid, - prefix=str(prefix), - recursive=recursive, - ) + return self.api.projects.ls(project_uid, path=str(prefix), recursive=recursive) + @contextmanager def download(self, project_uid: str, path: Union[str, PurePosixPath]): """ Open a file in the given project for reading. Use to get files from a @@ -631,8 +664,9 @@ def download(self, project_uid: str, path: Union[str, PurePosixPath]): """ if not path: raise ValueError("Download path cannot be empty") - data = {"project_uid": project_uid, "path": str(path)} - return make_json_request(self.vis, "/get_project_file", data=data) + stream = self.api.projects.download_file(project_uid, path=str(path)) + iterator = BinaryIteratorIO(stream.stream()) + yield iterator def download_file( self, @@ -655,16 +689,8 @@ def download_file( Returns: Path | IO: resulting target path or file handle. """ - if isinstance(target, (str, PurePath)): - target = Path(target) - if target.is_dir(): - target /= PurePath(path).name - with bopen(target, "wb") as f: - with self.download(project_uid, path) as response: - data = response.read(ONE_MIB) - while data: - f.write(data) - data = response.read(ONE_MIB) + stream = self.api.projects.download_file(project_uid, path=str(path)) + stream.save(target) return target def download_dataset(self, project_uid: str, path: Union[str, PurePosixPath]): @@ -679,26 +705,16 @@ def download_dataset(self, project_uid: str, path: Union[str, PurePosixPath]): Returns: Dataset: Loaded dataset instance """ - with self.download(project_uid, path) as response: - size = response.headers.get("Content-Length") - mime = response.headers.get("Content-Type") - if mime == "application/x-cryosparc-dataset": - # Stream format; can load directly without seek - return Dataset.load(response) - - # Numpy format, cannot load directly because requires seekable - if size and int(size) < ONE_MIB: - # Smaller than 1MiB, just read all into memory and load - return Dataset.load(BytesIO(response.read())) - - # Read into temporary file in 1MiB chunks. Load from that temporary file - with tempfile.TemporaryFile("w+b", suffix=".cs") as f: - data = response.read(ONE_MIB) - while data: - f.write(data) - data = response.read(ONE_MIB) - f.seek(0) - return Dataset.load(f) + stream = self.api.projects.download_file(project_uid, path=str(path)) + if stream.media_type == "application/x-cryosparc-dataset": + # Stream format; can load directly without seek + return Dataset.from_iterator(stream.stream()) + + # Numpy format, cannot load directly because requires seekable. Load from that temporary file + with tempfile.TemporaryFile("w+b", suffix=".cs") as f: + stream.save(f) + f.seek(0) + return Dataset.load(f) def download_mrc(self, project_uid: str, path: Union[str, PurePosixPath]): """ @@ -712,16 +728,13 @@ def download_mrc(self, project_uid: str, path: Union[str, PurePosixPath]): Returns: tuple[Header, NDArray]: MRC file header and data as a numpy array """ - with self.download(project_uid, path) as response: - with tempfile.TemporaryFile("w+b", suffix=".cs") as f: - data = response.read(ONE_MIB) - while data: - f.write(data) - data = response.read(ONE_MIB) - f.seek(0) - return mrc.read(f) # FIXME: Optimize file reading - - def list_assets(self, project_uid: str, job_uid: str) -> List[AssetDetails]: + stream = self.api.projects.download_file(project_uid, path=str(path)) + with tempfile.TemporaryFile("w+b", suffix=".mrc") as f: + stream.save(f) + f.seek(0) + return mrc.read(f) # FIXME: Optimize file reading + + def list_assets(self, project_uid: str, job_uid: str) -> List[GridFSFile]: """ Get a list of files available in the database for given job. Returns a list with details about the assets. Each entry is a dict with a ``_id`` @@ -733,9 +746,9 @@ def list_assets(self, project_uid: str, job_uid: str) -> List[AssetDetails]: job_uid (str): job unique ID, e.g., "J42" Returns: - list[AssetDetails]: Asset details + list[GridFSFile]: Asset details """ - return self.vis.list_job_files(project_uid=project_uid, job_uid=job_uid) # type: ignore + return self.api.assets.find(project_uid=project_uid, job_uid=job_uid) def download_asset(self, fileid: str, target: Union[str, PurePath, IO[bytes]]): """ @@ -743,34 +756,21 @@ def download_asset(self, fileid: str, target: Union[str, PurePath, IO[bytes]]): Args: fileid (str): GridFS file object ID - target (str | Path | IO): Local file path, directory path or - writeable file handle to write response data. + target (str | Path | IO): Local file path or writeable file handle + to write response data. Returns: - Path | IO: resulting target path or file handle. + str | Path | IO: resulting target path or file handle. """ - with make_json_request(self.vis, url="/get_job_file", data={"fileid": fileid}) as response: - if isinstance(target, (str, PurePath)): - target = Path(target) - if target.is_dir(): - # Try to get download filename and content type from - # headers. If cannot be determined, defaults to "file.dat" - content_type: str = response.headers.get_content_type() - attachment_filename: Optional[str] = response.headers.get_filename() - target /= attachment_filename or f"file.{ASSET_EXTENSIONS.get(content_type, 'dat')}" # type: ignore - with bopen(target, "wb") as f: - data = response.read(ONE_MIB) - while data: - f.write(data) - data = response.read(ONE_MIB) - - return target + stream = self.api.assets.download(fileid) + stream.save(target) + return target def upload( self, project_uid: str, target_path: Union[str, PurePosixPath], - source: Union[str, bytes, PurePath, IO], + source: Union[str, bytes, PurePath, IO, Stream], *, overwrite: bool = False, ): @@ -782,21 +782,18 @@ def upload( project_uid (str): Project unique ID, e.g., "P3" target_path (str | Path): Name or path of file to write in project directory. - source (str | bytes | Path | IO): Local path or file handle to - upload. May also specified as raw bytes. + source (str | bytes | Path | IO | Stream): Local path or file handle + to upload. May also specified as raw bytes. overwrite (bool, optional): If True, overwrite existing files. Defaults to False. """ - url = f"/projects/{project_uid}/files" - query: dict = {"path": target_path} - if overwrite: - query["overwrite"] = 1 - with open(source, "rb") if isinstance(source, (str, PurePath)) else noopcontext(source) as f: - with make_request(self.vis, url=url, query=query, data=f) as res: - assert res.status >= 200 and res.status < 300, ( - f"Could not upload project {project_uid} file {target_path}.\n" - f"Response from CryoSPARC ({res.status}): {res.read().decode()}" - ) + if isinstance(source, bytes): + source = BytesIO(source) + if isinstance(source, TextIOBase): # e.g., open(p, "r") or StringIO() + source = Stream.from_iterator(s.encode() for s in source) + if not isinstance(source, Stream): + source = Stream.load(source) + self.api.projects.upload_file(project_uid, source, path=str(target_path), overwrite=overwrite) def upload_dataset( self, @@ -821,6 +818,9 @@ def upload_dataset( overwrite (bool, optional): If True, overwrite existing files. Defaults to False. """ + if format == CSDAT_FORMAT: + return self.upload(project_uid, target_path, Stream.from_iterator(dset.stream()), overwrite=overwrite) + if len(dset) < 100: # Probably small enough to upload from memory f = BytesIO() @@ -881,8 +881,8 @@ def mkdir( existing directories. Still raises if the target path is not a directory. Defaults to False. """ - self.vis.project_mkdir( # type: ignore - project_uid=project_uid, + self.api.projects.mkdir( + project_uid, path=str(target_path), parents=parents, exist_ok=exist_ok, @@ -902,11 +902,7 @@ def cp(self, project_uid: str, source_path: Union[str, PurePosixPath], target_pa directory to copy into. If not specified, uses the same file name as the source. Defaults to "". """ - self.vis.project_cp( # type: ignore - project_uid=project_uid, - source_path=str(source_path), - target_path=str(target_path), - ) + self.api.projects.cp(project_uid, source=str(source_path), path=str(target_path)) def symlink( self, @@ -927,11 +923,7 @@ def symlink( directory. If not specified, creates link with the same file name as the source. Defaults to "". """ - self.vis.project_symlink( # type: ignore - project_uid=project_uid, - source_path=str(source_path), - target_path=str(target_path), - ) + self.api.projects.symlink(project_uid, source=str(source_path), path=str(target_path)) def get_import_signatures(abs_paths: Union[str, Iterable[str], "NDArray"]): diff --git a/cryosparc/util.py b/cryosparc/util.py index cf46df91..9a5eaa47 100644 --- a/cryosparc/util.py +++ b/cryosparc/util.py @@ -5,9 +5,7 @@ TYPE_CHECKING, Any, Callable, - ContextManager, Dict, - Generator, Generic, Iterator, List, @@ -26,7 +24,7 @@ if TYPE_CHECKING: from numpy.typing import NDArray # type: ignore -from .dtype import Shape +from .spec import Shape OpenTextMode = Literal["r", "w", "x", "a", "r+", "w+", "x+", "a+"] """ @@ -227,24 +225,6 @@ def bopen(file: Union[str, PurePath, IO[bytes]], mode: OpenBinaryMode = "rb"): yield file -@overload -def noopcontext() -> ContextManager[None]: ... -@overload -def noopcontext(x: T) -> ContextManager[T]: ... -@contextmanager -def noopcontext(x: Optional[T] = None) -> Generator[Optional[T], None, None]: - """ - Context manager that yields the given argument without modification. - - Args: - x (T, optional): Anything. Defaults to None. - - Yields: - T: the given argument - """ - yield x - - def padarray(arr: "NDArray", dim: Optional[int] = None, val: n.number = n.float32(0)): """ Pad the given 2D or 3D array so that the x and y dimensions are equal to the @@ -310,10 +290,7 @@ def default_rng(seed=None) -> "n.random.Generator": Returns: numpy.random.Generator: Random number generator """ - try: - return n.random.default_rng(seed) - except AttributeError: - return n.random.RandomState(seed) # type: ignore + return n.random.default_rng(seed) def random_integers( @@ -337,11 +314,7 @@ def random_integers( Returns: NDArray: Numpy array of randomly-generated integers. """ - try: - f = rng.integers - except AttributeError: - f = rng.randint # type: ignore - return f(low=low, high=high, size=size, dtype=dtype) # type: ignore + return rng.integers(low=low, high=high, size=size, dtype=dtype) # type: ignore def print_table(headings: List[str], rows: List[List[str]]): @@ -355,3 +328,12 @@ def print_table(headings: List[str], rows: List[List[str]]): print("=" * len(heading)) for row in rows: print(" | ".join(f"{v:{p}s}" for v, p in zip(row, pad))) + + +def clear_cached_property(obj: object, name: str): + """ + Clear object's @cached_property without accessing it when it's never been cached. + Object must have __dict__ key. + """ + if name in obj.__dict__: + delattr(obj, name) diff --git a/docs/examples/3dflex-custom-latent-trajectory.ipynb b/docs/examples/3dflex-custom-latent-trajectory.ipynb index 03458c97..30efa4c6 100644 --- a/docs/examples/3dflex-custom-latent-trajectory.ipynb +++ b/docs/examples/3dflex-custom-latent-trajectory.ipynb @@ -608,7 +608,7 @@ "# so we need to divide the components_mode fields by two to get the total number of components\n", "num_components = int(len([x for x in particles.fields() if \"components_mode\" in x]) / 2)\n", "\n", - "slot_spec = [{\"dtype\": \"components\", \"prefix\": f\"components_mode_{k}\", \"required\": True} for k in range(num_components)]\n", + "slot_spec = [{\"dtype\": \"components\", \"name\": f\"components_mode_{k}\"} for k in range(num_components)]\n", "job = project.create_external_job(\"W5\", \"Custom Latents\")\n", "job.connect(\"particles\", \"J243\", \"particles\", slots=slot_spec)" ] diff --git a/docs/examples/cryolo.ipynb b/docs/examples/cryolo.ipynb index 0b565748..af135389 100644 --- a/docs/examples/cryolo.ipynb +++ b/docs/examples/cryolo.ipynb @@ -32,16 +32,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "Connection succeeded to CryoSPARC command_core at http://cryoem0.sbi:40002\n", - "Connection succeeded to CryoSPARC command_vis at http://cryoem0.sbi:40003\n", - "Connection succeeded to CryoSPARC command_rtp at http://cryoem0.sbi:40005\n" + "Connection succeeded to CryoSPARC API at http://cryoem0.sbi:61002\n" ] } ], "source": [ "from cryosparc.tools import CryoSPARC\n", "\n", - "cs = CryoSPARC(host=\"cryoem0.sbi\", base_port=40000)\n", + "cs = CryoSPARC(host=\"cryoem0.sbi\", base_port=61000)\n", "assert cs.test_connection()\n", "\n", "project = cs.find_project(\"P251\")" @@ -175,12 +173,12 @@ "\n", "for mic in all_micrographs.rows():\n", " source = mic[\"micrograph_blob/path\"]\n", - " target = job.uid + \"/full_data/\" + source.split(\"/\")[-1]\n", + " target = job.uid + \"/full_data/\"\n", " project.symlink(source, target)\n", "\n", "for mic in train_micrographs.rows():\n", " source = mic[\"micrograph_blob/path\"]\n", - " target = job.uid + \"/train_image/\" + source.split(\"/\")[-1]\n", + " target = job.uid + \"/train_image/\"\n", " project.symlink(source, target)" ] }, @@ -293,7 +291,11 @@ "\n", "cryosparc-tools provides a `job.subprocess` function to run arbitrary processes, including `cryolo_*.py` scripts installed in the active conda environment.\n", "\n", - "Use `job.subprocess` to generate a crYOLO configuration file with the `cryolo_gui.py config` command. Specify a box size of 130 for this dataset." + "Use `job.subprocess` to generate a crYOLO configuration file with the `cryolo_gui.py config` command. Specify a box size of 130 for this dataset.\n", + "\n", + "```{note}\n", + "When connecting to a remote CryoSPARC instance, note that `job.subprocess` processes will run on the local machine, not remotely.\n", + "```" ] }, { @@ -368,7 +370,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This creates a `cryolo_model.h5` trained model file in the job directory.\n", + "Open the External job's \"Events\" tab from CryoSPARC's web interface to view crYOLO's output. When the process completes, crYOLO creates a `cryolo_model.h5` trained model file in the job directory.\n", "\n", "## Picking\n", "\n", diff --git a/docs/examples/custom-workflow.ipynb b/docs/examples/custom-workflow.ipynb index 1ef8a5e2..deb87aac 100644 --- a/docs/examples/custom-workflow.ipynb +++ b/docs/examples/custom-workflow.ipynb @@ -42,16 +42,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "Connection succeeded to CryoSPARC command_core at http://cryoem0.sbi:40002\n", - "Connection succeeded to CryoSPARC command_vis at http://cryoem0.sbi:40003\n", - "Connection succeeded to CryoSPARC command_rtp at http://cryoem0.sbi:40005\n" + "Connection succeeded to CryoSPARC API at http://cryoem0.sbi:61002\n" ] } ], "source": [ "from cryosparc.tools import CryoSPARC\n", "\n", - "cs = CryoSPARC(host=\"cryoem0.sbi\", base_port=40000)\n", + "cs = CryoSPARC(host=\"cryoem0.sbi\", base_port=61000)\n", "assert cs.test_connection()\n", "\n", "project = cs.find_project(\"P251\")\n", @@ -64,16 +62,123 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Import the movies with an Import Movies job. Note that you may use the `CryoSPARC.get_job_sections` method to inspect available job type keys to use with `Workspace.create_job`." + "Import the movies with an Import Movies job. Note that you may use the `CryoSPARC.print_job_types` method to inspect available job type keys to use with `Workspace.create_job`." ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Category | Job | Title | Stability\n", + "====================================================================================================\n", + "import | import_movies | Import Movies | stable \n", + " | import_micrographs | Import Micrographs | stable \n", + " | import_particles | Import Particle Stack | stable \n", + " | import_volumes | Import 3D Volumes | stable \n", + " | import_templates | Import Templates | stable \n", + " | import_result_group | Import Result Group | stable \n", + " | import_beam_shift | Import Beam Shift | stable \n", + "motion_correction | patch_motion_correction_multi | Patch Motion Correction | stable \n", + " | rigid_motion_correction_multi | Full-frame Motion Correction | stable \n", + " | rigid_motion_correction | Full-frame Motion Correction | develop \n", + " | local_motion_correction | Local Motion Correction | stable \n", + " | local_motion_correction_multi | Local Motion Correction | stable \n", + " | motion_correction_motioncor2 | MotionCor2 | beta \n", + " | reference_motion_correction | Reference Based Motion Correction | beta \n", + " | local_applytraj | Apply Trajectories | develop \n", + " | patch_to_local | Patch Motion to Local Motion | develop \n", + " | recenter_trajectories | Recenter Trajectories | develop \n", + "ctf_estimation | patch_ctf_estimation_multi | Patch CTF Estimation | stable \n", + " | patch_ctf_extract | Patch CTF Extraction | stable \n", + " | ctf_estimation | CTF Estimation (CTFFIND4) | stable \n", + "exposure_curation | denoise_train | Micrograph Denoiser | beta \n", + " | curate_exposures_v2 | Manually Curate Exposures | stable \n", + "particle_picking | manual_picker_v2 | Manual Picker | stable \n", + " | blob_picker_gpu | Blob Picker | stable \n", + " | template_picker_gpu | Template Picker | stable \n", + " | filament_tracer_gpu | Filament Tracer | stable \n", + " | auto_blob_picker_gpu | Blob Picker Tuner | stable \n", + " | inspect_picks_v2 | Inspect Particle Picks | stable \n", + " | create_templates | Create Templates | stable \n", + "extraction | extract_micrographs_multi | Extract From Micrographs (GPU) | stable \n", + " | extract_micrographs_cpu_parallel | Extract From Micrographs (CPU) | stable \n", + " | downsample_particles | Downsample Particles | stable \n", + " | restack_particles | Restack Particles | stable \n", + "deep_picker | topaz_train | Topaz Train | stable \n", + " | topaz_cross_validation | Topaz Cross Validation (BETA) | beta \n", + " | topaz_extract | Topaz Extract | stable \n", + " | topaz_denoise | Topaz Denoise | stable \n", + "particle_curation | class_2D_new | 2D Classification (NEW) | stable \n", + " | select_2D | Select 2D Classes | stable \n", + " | reference_select_2D | Reference Based Auto Select 2D | beta \n", + " | reconstruct_2D | Reconstruct 2D Classes | stable \n", + " | rebalance_classes_2D | Rebalance 2D Classes | stable \n", + " | class_probability_filter | Class Probability Filter | stable \n", + " | rebalance_3D | Rebalance Orientations | stable \n", + "reconstruction | homo_abinit | Ab-Initio Reconstruction | stable \n", + "refinement | homo_refine_new | Homogeneous Refinement | stable \n", + " | hetero_refine | Heterogeneous Refinement | stable \n", + " | nonuniform_refine_new | Non-uniform Refinement | stable \n", + " | homo_reconstruct | Homogeneous Reconstruction Only | stable \n", + " | hetero_reconstruct_new | Heterogenous Reconstruction Only | stable \n", + "ctf_refinement | ctf_refine_global | Global CTF Refinement | stable \n", + " | ctf_refine_local | Local CTF Refinement | stable \n", + " | exposure_groups | Exposure Group Utilities | stable \n", + "variability | var_3D | 3D Variability | stable \n", + " | var_3D_disp | 3D Variability Display | stable \n", + " | class_3D | 3D Classification | stable \n", + " | regroup_3D_new | Regroup 3D Classes | stable \n", + " | reference_select_3D | Reference Based Auto Select 3D | beta \n", + " | reorder_3D | Reorder 3D Classes | beta \n", + "flexibility | flex_prep | 3D Flex Data Prep | beta \n", + " | flex_meshprep | 3D Flex Mesh Prep | beta \n", + " | flex_train | 3D Flex Training | beta \n", + " | flex_highres | 3D Flex Reconstruction | beta \n", + " | flex_generate | 3D Flex Generator | beta \n", + "postprocessing | sharpen | Sharpening Tools | stable \n", + " | deepemhancer | DeepEMhancer | stable \n", + " | validation | Validation (FSC) | stable \n", + " | local_resolution | Local Resolution Estimation | stable \n", + " | local_filter | Local Filtering | stable \n", + " | reslog | ResLog Analysis | stable \n", + "local_refinement | new_local_refine | Local Refinement | stable \n", + " | particle_subtract | Particle Subtraction | stable \n", + "helix | helix_refine | Helical Refinement | stable \n", + " | helix_search | Symmetry Search Utility | stable \n", + " | helix_initmodel | Helical Initial Model Utility | develop \n", + " | helix_symmetrize | Apply Helical Symmetry | develop \n", + " | helix_average_power_spectra | Average Power Spectra | stable \n", + "utilities | exposure_sets | Exposure Sets Tool | stable \n", + " | exposure_tools | Exposure Tools | stable \n", + " | generate_thumbs | Generate Micrograph Thumbnails | stable \n", + " | cache_particles | Cache Particles on SSD | stable \n", + " | check_corrupt_particles | Check For Corrupt Particles | stable \n", + " | check_corrupt_micrographs | Check For Corrupt Micrographs | stable \n", + " | particle_sets | Particle Sets Tool | stable \n", + " | reassign_particles_mics | Reassign Particles to Micrographs | stable \n", + " | remove_duplicate_particles | Remove Duplicate Particles | stable \n", + " | sym_expand | Symmetry Expansion | stable \n", + " | volume_tools | Volume Tools | stable \n", + " | volume_alignment_tools | Volume Alignment Tools | stable \n", + " | align_3D_new | Align 3D Maps | stable \n", + " | split_volumes_group | Split Volumes Group | stable \n", + " | orientation_diagnostics | Orientation Diagnostics | stable \n", + "simulations | simulator_gpu | Simulate Data | stable \n", + "instance_testing | instance_launch_test | Test Job Launch | stable \n", + " | worker_ssd_test | Test Worker SSD | stable \n", + " | worker_gpu_test | Test Worker GPUs | stable \n", + " | worker_benchmark | Benchmark | stable \n", + "workflows | extensive_workflow_bench | Extensive Validation | stable \n" + ] + } + ], "source": [ - "job_sections = cs.get_job_sections() # [{'contains': ['import_movies', 'import_micrographs', ...] ... }, ...]\n", + "job_sections = cs.print_job_types()\n", "import_movies_job = workspace.create_job(\n", " \"import_movies\",\n", " params={\n", @@ -104,25 +209,32 @@ "name": "stdout", "output_type": "stream", "text": [ - "Param | Title | Type | Default\n", - "=============================================================================\n", - "accel_kv | Accelerating Voltage (kV) | number | None \n", - "blob_paths | Movies data path | path | None \n", - "cs_mm | Spherical Aberration (mm) | number | None \n", - "defect_path | Defect file path | path | None \n", - "eer_num_fractions | EER Number of Fractions | number | 40 \n", - "eer_upsamp_factor | EER Upsampling Factor | number | 2 \n", - "gainref_flip_x | Flip gain ref & defect file in X? | boolean | False \n", - "gainref_flip_y | Flip gain ref & defect file in Y? | boolean | False \n", - "gainref_path | Gain reference path | path | None \n", - "gainref_rotate_num | Rotate gain ref? | number | 0 \n", - "negative_stain_data | Negative Stain Data | boolean | False \n", - "output_constant_ctf | Output Constant CTF | boolean | False \n", - "override_exp_group_id | Override Exposure Group ID | number | None \n", - "phase_plate_data | Phase Plate Data | boolean | False \n", - "psize_A | Raw pixel size (A) | number | None \n", - "skip_header_check | Skip Header Check | boolean | False \n", - "total_dose_e_per_A2 | Total exposure dose (e/A^2) | number | None \n" + "Param | Title | Type | Default\n", + "=========================================================================================================\n", + "blob_paths | Movies data path | string | None \n", + "gainref_path | Gain reference path | string | None \n", + "defect_path | Defect file path | string | None \n", + "gainref_flip_x | Flip gain ref & defect file in X? | boolean | False \n", + "gainref_flip_y | Flip gain ref & defect file in Y? | boolean | False \n", + "gainref_rotate_num | Rotate gain ref? | integer | 0 \n", + "psize_A | Pixel size (A) | number | None \n", + "accel_kv | Accelerating Voltage (kV) | number | None \n", + "cs_mm | Spherical Aberration (mm) | number | None \n", + "total_dose_e_per_A2 | Total exposure dose (e/A^2) | number | None \n", + "negative_stain_data | Negative Stain Data | boolean | False \n", + "phase_plate_data | Phase Plate Data | boolean | False \n", + "override_exp_group_id | Override Exposure Group ID | integer | None \n", + "skip_header_check | Skip Header Check | boolean | True \n", + "output_constant_ctf | Output Constant CTF | boolean | False \n", + "eer_num_fractions | EER Number of Fractions | integer | 40 \n", + "eer_upsamp_factor | EER Upsampling Factor | number | 2 \n", + "parse_xml_files | Import Beam Shift Values from XML Files | boolean | False \n", + "xml_paths | EPU XML metadata path | string | None \n", + "mov_cut_prefix_xml | Length of input filename prefix to cut for XML correspondence | integer | None \n", + "mov_cut_suffix_xml | Length of input filename suffix to cut for XML correspondence | integer | None \n", + "xml_cut_prefix_xml | Length of XML filename prefix to cut for input correspondence | integer | None \n", + "xml_cut_suffix_xml | Length of XML filename suffix to cut for input correspondence | integer | 4 \n", + "compute_num_cpus | Number of CPUs to parallelize during header check | integer | 4 \n" ] } ], @@ -155,7 +267,7 @@ } ], "source": [ - "import_movies_job.set_param(\"skip_header_check\", True)" + "import_movies_job.set_param(\"skip_header_check\", False)" ] }, { @@ -489,7 +601,7 @@ ")\n", "\n", "classify_blob_picks_job = workspace.create_job(\n", - " \"class_2D\",\n", + " \"class_2D_new\",\n", " connections={\"particles\": (extract_blob_picks_job.uid, \"particles\")},\n", " params={\"class2D_K\": 10},\n", ")\n", diff --git a/docs/examples/hi-res-2d-classes.ipynb b/docs/examples/hi-res-2d-classes.ipynb index 85289200..c0509791 100644 --- a/docs/examples/hi-res-2d-classes.ipynb +++ b/docs/examples/hi-res-2d-classes.ipynb @@ -13,23 +13,21 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Connection succeeded to CryoSPARC command_core at http://cryoem0.sbi:40002\n", - "Connection succeeded to CryoSPARC command_vis at http://cryoem0.sbi:40003\n", - "Connection succeeded to CryoSPARC command_rtp at http://cryoem0.sbi:40005\n" + "Connection succeeded to CryoSPARC API at http://cryoem0.sbi:61002\n" ] } ], "source": [ "from cryosparc.tools import CryoSPARC\n", "\n", - "cs = CryoSPARC(host=\"cryoem0.sbi\", base_port=40000)\n", + "cs = CryoSPARC(host=\"cryoem0.sbi\", base_port=61000)\n", "assert cs.test_connection()" ] }, @@ -55,7 +53,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This job has 10 selected templates. Each template is stored in a MRC file created by the 2D Classification job. The `templates_selected` contains the location of these. Load all the unique paths (organized by path in a Python dictionary).\n" + "This job has 24 selected templates. Each template is stored in a MRC file created by the 2D Classification job. The `templates_selected` contains the location of these. Load all the unique paths (organized by path in a Python dictionary).\n" ] }, { @@ -100,17 +98,17 @@ "\n", "N = templates_selected[\"blob/shape\"][0][0]\n", "scale = 100 / templates_selected[\"blob/psize_A\"][0] # 100 Å in pixels\n", - "fig, axes = plt.subplots(3, 5, figsize=(5, 3), dpi=400)\n", + "fig, axes = plt.subplots(3, 8, figsize=(8, 3), dpi=400)\n", "plt.margins(x=0, y=0)\n", "\n", "for i, template in enumerate(templates_selected.rows()):\n", " path = template[\"blob/path\"]\n", " index = template[\"blob/idx\"]\n", " blob = all_templates_blobs[path][index]\n", - " ax = axes[i // 5, i % 5]\n", + " ax = axes[i // 8, i % 8]\n", " ax.axis(\"off\")\n", " ax.imshow(blob, cmap=\"gray\", origin=\"lower\")\n", - " if i % 5 > 0:\n", + " if i % 8 > 0:\n", " continue\n", "\n", " # Plot scale bar\n", diff --git a/docs/examples/recenter-particles.ipynb b/docs/examples/recenter-particles.ipynb index c719ed4a..0897d406 100644 --- a/docs/examples/recenter-particles.ipynb +++ b/docs/examples/recenter-particles.ipynb @@ -28,15 +28,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "Connection succeeded to CryoSPARC command_core at http://cryoem5:40002\n", - "Connection succeeded to CryoSPARC command_vis at http://cryoem5:40003\n" + "Connection succeeded to CryoSPARC API at http://cryoem0.sbi:61002\n" ] } ], "source": [ "from cryosparc.tools import CryoSPARC\n", "\n", - "cs = CryoSPARC(host=\"cryoem5\", base_port=40000)\n", + "cs = CryoSPARC(host=\"cryoem0.sbi\", base_port=61000)\n", "assert cs.test_connection()" ] }, diff --git a/docs/examples/xml-exposure-groups.ipynb b/docs/examples/xml-exposure-groups.ipynb index 7f8470f0..42c622ee 100644 --- a/docs/examples/xml-exposure-groups.ipynb +++ b/docs/examples/xml-exposure-groups.ipynb @@ -20,15 +20,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "Connection succeeded to CryoSPARC command_core at http://cryoem5:40002\n", - "Connection succeeded to CryoSPARC command_vis at http://cryoem5:40003\n" + "Connection succeeded to CryoSPARC API at http://cryoem0.sbi:61002\n" ] } ], "source": [ "from cryosparc.tools import CryoSPARC\n", "\n", - "cs = CryoSPARC(host=\"cryoem5\", base_port=40000)\n", + "cs = CryoSPARC(host=\"cryoem0.sbi\", base_port=61000)\n", "assert cs.test_connection()\n", "\n", "project = cs.find_project(\"P251\")" @@ -137,7 +136,7 @@ "\n", "from cryosparc.tools import get_exposure_format, get_import_signatures\n", "\n", - "job.start()\n", + "output_datasets = {}\n", "\n", "for i, node in enumerate(doc.getElementsByTagName(\"imageSet\")[:2]):\n", " directory = get_child_value(node, \"directory\")\n", @@ -177,9 +176,11 @@ " dset[\"gain_ref_blob/path\"] = str(gain_path)\n", " dset[\"gain_ref_blob/shape\"] = (image_height, image_width)\n", "\n", - " job.save_output(f\"images_{i}\", dset)\n", + " output_datasets[f\"images_{i}\"] = dset\n", "\n", - "job.stop()" + "with job.run():\n", + " for output_name, dset in output_datasets.items():\n", + " job.save_output(output_name, dset)" ] }, { diff --git a/docs/guides/jobs.ipynb b/docs/guides/jobs.ipynb index 4641f78e..7294cc4e 100644 --- a/docs/guides/jobs.ipynb +++ b/docs/guides/jobs.ipynb @@ -30,16 +30,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "Connection succeeded to CryoSPARC command_core at http://cryoem0.sbi:40002\n", - "Connection succeeded to CryoSPARC command_vis at http://cryoem0.sbi:40003\n", - "Connection succeeded to CryoSPARC command_rtp at http://cryoem0.sbi:40005\n" + "Connection succeeded to CryoSPARC API at http://cryoem0.sbi:61002\n" ] } ], "source": [ "from cryosparc.tools import CryoSPARC\n", "\n", - "cs = CryoSPARC(host=\"cryoem0.sbi\", base_port=40000)\n", + "cs = CryoSPARC(host=\"cryoem0.sbi\", base_port=61000)\n", "assert cs.test_connection()" ] }, @@ -66,22 +64,22 @@ "name": "stdout", "output_type": "stream", "text": [ - "Section | Job | Title \n", - "=================================================================================\n", - "extraction | extract_micrographs_multi | Extract From Micrographs (GPU) \n", - " | extract_micrographs_cpu_parallel | Extract From Micrographs (CPU) \n", - " | downsample_particles | Downsample Particles \n", - " | restack_particles | Restack Particles \n", - "refinement | homo_refine_new | Homogeneous Refinement \n", - " | hetero_refine | Heterogeneous Refinement \n", - " | nonuniform_refine_new | Non-uniform Refinement \n", - " | homo_reconstruct | Homogeneous Reconstruction Only \n", - " | hetero_reconstruct | Heterogeneous Reconstruction Only\n" + "Category | Job | Title | Stability\n", + "============================================================================================\n", + "extraction | extract_micrographs_multi | Extract From Micrographs (GPU) | stable \n", + " | extract_micrographs_cpu_parallel | Extract From Micrographs (CPU) | stable \n", + " | downsample_particles | Downsample Particles | stable \n", + " | restack_particles | Restack Particles | stable \n", + "refinement | homo_refine_new | Homogeneous Refinement | stable \n", + " | hetero_refine | Heterogeneous Refinement | stable \n", + " | nonuniform_refine_new | Non-uniform Refinement | stable \n", + " | homo_reconstruct | Homogeneous Reconstruction Only | stable \n", + " | hetero_reconstruct_new | Heterogenous Reconstruction Only | stable \n" ] } ], "source": [ - "cs.print_job_types(section=[\"extraction\", \"refinement\"])" + "cs.print_job_types(category=[\"extraction\", \"refinement\"])" ] }, { @@ -182,16 +180,16 @@ "text": [ "Param | Title | Type | Default\n", "==================================================================================\n", - "bin_size_pix | Fourier-crop to box size (pix) | number | None \n", - "bin_size_pix_small | Second (small) F-crop box size (pix) | number | None \n", - "box_size_pix | Extraction box size (pix) | number | 256 \n", - "compute_num_cores | Number of CPU cores | number | 4 \n", - "flip_x | Flip mic. in x before extract? | boolean | False \n", - "flip_y | Flip mic. in y before extract? | boolean | False \n", - "force_reextract_CTF | Force re-extract CTFs from micrographs | boolean | False \n", - "num_extract | Number of mics to extract | number | None \n", + "compute_num_cores | Number of CPU cores | integer | 4 \n", + "box_size_pix | Extraction box size (pix) | integer | 256 \n", + "bin_size_pix | Fourier-crop to box size (pix) | integer | None \n", + "bin_size_pix_small | Second (small) F-crop box size (pix) | integer | None \n", "output_f16 | Save results in 16-bit floating point | boolean | False \n", + "force_reextract_CTF | Force re-extract CTFs from micrographs | boolean | False \n", "recenter_using_shifts | Recenter using aligned shifts | boolean | True \n", + "num_extract | Number of mics to extract | integer | None \n", + "flip_x | Flip mic. in x before extract? | boolean | False \n", + "flip_y | Flip mic. in y before extract? | boolean | False \n", "scale_const_override | Scale constant (override) | number | None \n" ] } @@ -334,23 +332,24 @@ "name": "stdout", "output_type": "stream", "text": [ - "Output | Title | Type | Result Slots | Result Types \n", - "==============================================================================================\n", - "micrographs | Micrographs accepted | exposure | micrograph_blob | micrograph_blob\n", - " | | | ctf | ctf \n", - " | | | mscope_params | mscope_params \n", - " | | | background_blob | stat_blob \n", - " | | | micrograph_thumbnail_blob_1x | thumbnail_blob \n", - " | | | micrograph_thumbnail_blob_2x | thumbnail_blob \n", - " | | | ctf_stats | ctf_stats \n", - " | | | micrograph_blob_non_dw | micrograph_blob\n", - " | | | rigid_motion | motion \n", - " | | | spline_motion | motion \n", - " | | | movie_blob | movie_blob \n", - " | | | gain_ref_blob | gain_ref_blob \n", - "particles | Particles accepted | particle | location | location \n", - " | | | pick_stats | pick_stats \n", - " | | | ctf | ctf \n" + "Output | Title | Type | Result Slots | Result Types | Passthrough?\n", + "=============================================================================================================\n", + "micrographs | Micrographs accepted | exposure | micrograph_blob | micrograph_blob | ✕ \n", + " | | | ctf | ctf | ✓ \n", + " | | | ctf_stats | ctf_stats | ✓ \n", + " | | | rigid_motion | motion | ✓ \n", + " | | | spline_motion | motion | ✓ \n", + " | | | mscope_params | mscope_params | ✓ \n", + " | | | background_blob | stat_blob | ✓ \n", + " | | | micrograph_thumbnail_blob_1x | thumbnail_blob | ✓ \n", + " | | | micrograph_thumbnail_blob_2x | thumbnail_blob | ✓ \n", + " | | | micrograph_blob_non_dw | micrograph_blob | ✓ \n", + " | | | micrograph_blob_non_dw_AB | micrograph_blob | ✓ \n", + " | | | movie_blob | movie_blob | ✓ \n", + " | | | gain_ref_blob | gain_ref_blob | ✓ \n", + "particles | Particles accepted | particle | location | location | ✕ \n", + " | | | ctf | ctf | ✓ \n", + " | | | pick_stats | pick_stats | ✓ \n" ] } ], @@ -1415,15 +1414,8 @@ { "data": { "text/plain": [ - "{'_id': '6560d183562b2c67c7d35754',\n", - " 'chunkSize': 2096128,\n", - " 'contentType': 'image/png',\n", - " 'filename': 'J96_extracted_coordinates_on_j2motioncorrected009270517818331954156_14sep05c_00024sq_00003hl_00002esframes_patch_aligned_doseweightedmrc.png',\n", - " 'job_uid': 'J96',\n", - " 'length': 867617,\n", - " 'md5': '471ab293b92726043c8277cb6964f70b',\n", - " 'project_uid': 'P251',\n", - " 'uploadDate': '2023-11-24T16:38:27.800000'}" + "('6786df603b5d82f98ab42232',\n", + " 'J478_extracted_coordinates_on_j357motioncorrected005581540094314066188_14sep05c_00024sq_00003hl_00002esframes_patch_aligned_doseweightedmrc.png')" ] }, "execution_count": 31, @@ -1433,7 +1425,7 @@ ], "source": [ "assets = job.list_assets()\n", - "assets[0]" + "assets[0].id, assets[0].filename" ] }, { @@ -1451,7 +1443,7 @@ { "data": { "text/plain": [ - "PosixPath('image.png')" + "'image.png'" ] }, "execution_count": 32, @@ -1460,7 +1452,7 @@ } ], "source": [ - "job.download_asset(assets[0][\"_id\"], \"image.png\")" + "job.download_asset(assets[0].id, \"image.png\")" ] }, { diff --git a/docs/intro.md b/docs/intro.md index 7179f17d..bee7ba51 100644 --- a/docs/intro.md +++ b/docs/intro.md @@ -23,7 +23,7 @@ Source code is [available on GitHub](https://github.com/cryoem-uoft/cryosparc-to ## Pre-requisites -- [Python ≥ 3.7](https://www.python.org/downloads/) +- [Python ≥ 3.8](https://www.python.org/downloads/) - [CryoSPARC ≥ v4.1](https://cryosparc.com/download) CryoSPARC installation must be accessible via one of the following methods: @@ -188,7 +188,6 @@ environment variables. To do the same, define `CRYOSPARC_LICENSE`, CryoSPARC license and login credentials: ```sh -CRYOSPARC_LICENSE_ID="xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx" \ CRYOSPARC_EMAIL="ali@example.com" \ CRYOSPARC_PASSWORD="password123" \ jupyter notebook --no-browser --ip=0.0.0.0 --port=8888 diff --git a/pyproject.toml b/pyproject.toml index dc64e932..4ca4dd5e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "cryosparc-tools" version = "4.6.1" description = "Toolkit for interfacing with CryoSPARC" readme = "README.md" -requires-python = ">=3.7" +requires-python = ">=3.8" authors = [ { name = "Structura Biotechnology Inc.", email = "info@structura.bio" }, ] @@ -20,13 +20,17 @@ classifiers = [ "Topic :: Software Development :: Libraries", ] license = { file = "LICENSE" } -dependencies = ["numpy >= 1.15, < 3.0", "typing-extensions >= 3.7"] +dependencies = [ + "numpy >= 1.17, < 3.0", + "httpx ~= 0.25", + "pydantic ~= 2.8", + "typing-extensions >= 4.0", +] [project.optional-dependencies] dev = [ "build", "cython", - "httpretty", "pre-commit", "pyright", "pytest-benchmark", diff --git a/setup.py b/setup.py index 20e23659..3e25321f 100644 --- a/setup.py +++ b/setup.py @@ -30,11 +30,11 @@ headers=["cryosparc/include/cryosparc-tools/dataset.h"], ext_modules=cythonize( Extension( - name="cryosparc.core", + name="cryosparc.dataset.core", sources=[ "cryosparc/include/lz4/lib/lz4.c", - "cryosparc/dataset.c", - "cryosparc/core.pyx", + "cryosparc/dataset/dataset.c", + "cryosparc/dataset/core.pyx", ], include_dirs=["cryosparc/include/"], libraries=libraries, @@ -45,8 +45,8 @@ depends=[ "cryosparc/include/lz4/lib/lz4.h", "cryosparc/include/cryosparc-tools/dataset.h", - "cryosparc/lz4.pxd", - "cryosparc/dataset.pxd", + "cryosparc/dataset/lz4.pxd", + "cryosparc/dataset/dataset.pxd", ], ), language_level=3, diff --git a/tests/conftest.py b/tests/conftest.py index c0f45b34..4d04eaec 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,17 +1,30 @@ -import json import shutil import urllib.request -from io import BytesIO +from datetime import datetime, timezone from pathlib import Path from time import time -from typing import Any, Dict +from unittest import mock -import httpretty import numpy as n import pytest -from cryosparc.dataset import CSDAT_FORMAT, Row +from cryosparc.api import APIClient +from cryosparc.controllers.project import ProjectController from cryosparc.dataset import Dataset as BaseDataset +from cryosparc.dataset import Row +from cryosparc.models.job import Job +from cryosparc.models.job_spec import ( + Connection, + InputResult, + Inputs, + JobSpec, + Output, + OutputResult, + Outputs, + Params, +) +from cryosparc.models.project import Project +from cryosparc.models.user import Email, User from cryosparc.tools import CryoSPARC from cryosparc.util import default_rng @@ -98,225 +111,6 @@ def shuffle(self): # fmt: on -@pytest.fixture -def mock_jsonrpc_procs_core() -> Dict[str, Any]: - """ - Dictionary of JSON RPC method names and their return values. Can override - existing values in subfixtures. - """ - return { - "hello_world": {"hello": "world"}, - "get_running_version": "develop", - "get_id_by_email_password": "6372a35e821ed2b71d9fe4e3", - "get_job": { - "uid": "J1", - "project_uid": "P1", - "job_type": "homo_abinit", - "title": "New Job", - "description": "", - "created_by_user_id": "6372a35e821ed2b71d9fe4e3", - "output_results": [ - { - "uid": "J1-R3", - "type": "particle.blob", - "group_name": "particles_class_0", - "name": "blob", - "title": "Particle data", - "description": "Particle raw data", - "min_fields": [ - ["path", "O"], - ["idx", "u4"], - ["shape", "2u4"], - ["psize_A", "f4"], - ["sign", "f4"], - ["import_sig", "u8"], - ], - "versions": [0, 100, 200, 300, 400, 500, 600, 700, 800, 863], - "metafiles": [ - "J1/J1_class_00_00000_particles.cs", - "J1/J1_class_00_00100_particles.cs", - "J1/J1_class_00_00200_particles.cs", - "J1/J1_class_00_00300_particles.cs", - "J1/J1_class_00_00400_particles.cs", - "J1/J1_class_00_00500_particles.cs", - "J1/J1_class_00_00600_particles.cs", - "J1/J1_class_00_00700_particles.cs", - "J1/J1_class_00_00800_particles.cs", - "J1/J1_class_00_final_particles.cs", - ], - "num_items": [90, 9090, 12421, 12421, 12421, 12421, 12421, 12421, 12421, 12421], - "passthrough": False, - }, - { - "uid": "J1-R4", - "type": "particle.ctf", - "group_name": "particles_class_0", - "name": "ctf", - "title": "Particle CTF parameters", - "description": "Particle CTF parameters", - "min_fields": [ - ["type", "O"], - ["exp_group_id", "u4"], - ["accel_kv", "f4"], - ["cs_mm", "f4"], - ["amp_contrast", "f4"], - ["df1_A", "f4"], - ["df2_A", "f4"], - ["df_angle_rad", "f4"], - ["phase_shift_rad", "f4"], - ["scale", "f4"], - ["scale_const", "f4"], - ["shift_A", "2f4"], - ["tilt_A", "2f4"], - ["trefoil_A", "2f4"], - ["tetra_A", "4f4"], - ["anisomag", "4f4"], - ["bfactor", "f4"], - ], - "versions": [0, 100, 200, 300, 400, 500, 600, 700, 800, 863], - "metafiles": [ - "J1/J1_class_00_00000_particles.cs", - "J1/J1_class_00_00100_particles.cs", - "J1/J1_class_00_00200_particles.cs", - "J1/J1_class_00_00300_particles.cs", - "J1/J1_class_00_00400_particles.cs", - "J1/J1_class_00_00500_particles.cs", - "J1/J1_class_00_00600_particles.cs", - "J1/J1_class_00_00700_particles.cs", - "J1/J1_class_00_00800_particles.cs", - "J1/J1_class_00_final_particles.cs", - ], - "num_items": [90, 9090, 12421, 12421, 12421, 12421, 12421, 12421, 12421, 12421], - "passthrough": False, - }, - { # Empty to test a partially incomplete job - "uid": "J1-R7", - "type": "particle.pick_stats", - "group_name": "particles_class_0", - "name": "pick_stats", - "title": "Passthrough pick_stats", - "description": "Passthrough from input particles.pick_stats (result_name)", - "min_fields": [["ncc_score", "f4"], ["power", "f4"], ["template_idx", "u4"], ["angle_rad", "f4"]], - "versions": [], - "metafiles": [], - "num_items": [], - "passthrough": True, - }, - { - "uid": "J1-R8", - "type": "particle.location", - "group_name": "particles_class_0", - "name": "location", - "title": "Passthrough location", - "description": "Passthrough from input particles.location (result_name)", - "min_fields": [ - ["micrograph_uid", "u8"], - ["exp_group_id", "u4"], - ["micrograph_path", "O"], - ["micrograph_shape", "2u4"], - ["center_x_frac", "f4"], - ["center_y_frac", "f4"], - ], - "versions": [0], - "metafiles": ["J1/J1_passthrough_particles_class_0.cs"], - "num_items": [12421], - "passthrough": True, - }, - { - "uid": "J1-R9", - "type": "volume.blob", - "group_name": "volume_class_0", - "name": "map", - "title": "Volume data", - "description": "Volume raw data", - "min_fields": [["path", "O"], ["shape", "3u4"], ["psize_A", "f4"]], - "versions": [0, 100, 200, 300, 400, 500, 600, 700, 800, 862], - "metafiles": [ - "J1/J1_class_00_00000_volume.cs", - "J1/J1_class_00_00100_volume.cs", - "J1/J1_class_00_00200_volume.cs", - "J1/J1_class_00_00300_volume.cs", - "J1/J1_class_00_00400_volume.cs", - "J1/J1_class_00_00500_volume.cs", - "J1/J1_class_00_00600_volume.cs", - "J1/J1_class_00_00700_volume.cs", - "J1/J1_class_00_00800_volume.cs", - "J1/J1_class_00_final_volume.cs", - ], - "num_items": [1, 1, 1, 1, 1, 1, 1, 1, 1, 1], - "passthrough": False, - }, - ], - }, - "get_project_dir_abs": "/projects/my-project", - "get_project": {"uid": "P1", "title": "My Project"}, - "make_job": "J1", - "set_cluster_job_custom_vars": None, - "enqueue_job": "queued", - "job_send_streamlog": None, - "job_connect_group": True, - "job_set_param": True, - } - - -@pytest.fixture -def request_callback_core(mock_jsonrpc_procs_core): - def request_callback_core_fn(request, uri, response_headers): - body = json.loads(request.body) - mock_jsonrpc_procs_core["system.describe"] = {"procs": [{"name": m} for m in mock_jsonrpc_procs_core]} - response_headers["content-type"] = "application/json" - return [200, response_headers, json.dumps({"result": mock_jsonrpc_procs_core[body["method"]]})] - - return request_callback_core_fn - - -@pytest.fixture -def mock_jsonrpc_procs_vis() -> Dict[str, Any]: - return { - "hello_world": {"hello": "world"}, - } - - -@pytest.fixture -def request_callback_vis(mock_jsonrpc_procs_vis): - def request_callback_vis_fn(request, uri, response_headers): - body = json.loads(request.body) - - mock_jsonrpc_procs_vis["system.describe"] = {"procs": [{"name": m} for m in mock_jsonrpc_procs_vis]} - response_headers["content-type"] = "application/json" - return [200, response_headers, json.dumps({"result": mock_jsonrpc_procs_vis[body["method"]]})] - - return request_callback_vis_fn - - -def request_callback_vis_get_project_file(request, uri, response_headers): - body = json.loads(request.body) - data = b"" - dset = None - if body["project_uid"] == "P1" and body["path"] == "J1/J1_class_00_final_particles.cs": - dset = T20S_PARTICLES - elif body["project_uid"] == "P1" and body["path"] == "J1/J1_passthrough_particles_class_0.cs": - dset = T20S_PARTICLES_PASSTHROUGH - else: - raise RuntimeError(f"Unimplemented get_project_file pytest fixture for request body {body}") - - if dset: - bio = BytesIO() - dset.save(bio, format=CSDAT_FORMAT) - bio.seek(0) - data = bio.read() - - return [200, response_headers, data] - - -def request_callback_rtp(request, uri, response_headers): - body = json.loads(request.body) - procs: Dict[str, Any] = {"hello_world": {"hello": "world"}} - procs["system.describe"] = {"procs": [{"name": m} for m in procs]} - response_headers["content-type"] = "application/json" - return [200, response_headers, json.dumps({"result": procs[body["method"]]})] - - @pytest.fixture(scope="session") def big_dset_path(): basename = "bench_big_dset" @@ -427,21 +221,174 @@ def t20s_particles_passthrough(): @pytest.fixture -def cs(request_callback_core, request_callback_vis): - httpretty.enable(verbose=False, allow_net_connect=False) - httpretty.register_uri(httpretty.POST, "http://localhost:39002/api", body=request_callback_core) # type: ignore - httpretty.register_uri(httpretty.POST, "http://localhost:39003/api", body=request_callback_vis) # type: ignore - httpretty.register_uri( - httpretty.POST, - "http://localhost:39003/get_project_file", - body=request_callback_vis_get_project_file, # type: ignore +def mock_user(): + return User( + _id="6372a35e821ed2b71d9fe4e3", + name="test", + first_name="Testy", + last_name="Tester", + emails=[Email(address="test@example.com", verified=True)], + roles={"__global_roles__": ["admin"]}, + register_token=None, + allowed_prefix_dir="/", + created_at=datetime(2017, 1, 1, tzinfo=timezone.utc), ) - httpretty.register_uri(httpretty.POST, "http://localhost:39005/api", body=request_callback_rtp) # type: ignore - yield CryoSPARC(license="00000000-0000-0000-0000-000000000000", email="test@structura.bio", password="password") - httpretty.disable() - httpretty.reset() @pytest.fixture -def project(cs: CryoSPARC): +def cs(mock_user, monkeypatch): + monkeypatch.setattr(APIClient, "__call__", mock.Mock(return_value=None)) + APIClient.health = mock.Mock(return_value="OK") + APIClient.users = mock.MagicMock() + APIClient.config = mock.MagicMock() + APIClient.projects = mock.MagicMock() + APIClient.workspaces = mock.MagicMock() + APIClient.jobs = mock.MagicMock() + APIClient.users.me.return_value = mock_user + APIClient.config.get_version.return_value = "develop" + return CryoSPARC("https://cryosparc.example.com", email="structura@example.com", password="password") + + +@pytest.fixture +def mock_project(mock_user): + return Project( + _id="67292e95282b26b45d0e8fae", + uid="P1", + title="Test Project", + project_dir="/home/cryosparc/projects", + owner_user_id=mock_user.id, + size_last_updated=datetime.now(timezone.utc), + ) + + +@pytest.fixture +def project(cs: CryoSPARC, mock_project): + APIClient.projects.find_one.return_value = mock_project # type: ignore return cs.find_project("P1") + + +@pytest.fixture +def mock_new_job(mock_user, mock_project): + return Job( + _id="67743226e66c192db762b689", + uid="J42", + project_uid=mock_project.uid, + workspace_uids=["W1"], + job_dir="J42", + status="building", + created_by_user_id=mock_user.id, + spec=JobSpec( + type="homo_abinit", + params=Params(), + inputs=Inputs({"particles": []}), + outputs=Outputs( + { + "particles_class_0": Output( + type="particle", + results=[ + OutputResult(name="blob", dtype="blob"), + OutputResult(name="ctf", dtype="ctf"), + OutputResult(name="alignments3D", dtype="alignments3D"), + ], + ), + "volume_class_0": Output(type="volume", results=[OutputResult(name="map", dtype="blob")]), + } + ), + ), + ) + + +@pytest.fixture +def mock_params(): + return Params(abinit_K=1, generate_intermediate_results=True, random_seed=2056920808) + + +@pytest.fixture +def mock_new_job_with_params(mock_new_job: Job, mock_params: Params): + job = mock_new_job.model_copy(deep=True) + job.spec.params = mock_params + return job + + +@pytest.fixture +def mock_new_job_with_connection(mock_new_job_with_params: Job): + job = mock_new_job_with_params.model_copy(deep=True) + input_particles = Connection( + type="particle", + job_uid="J41", + output="particles", + results=[ + InputResult(name="blob", dtype="blob", job_uid="J42", output="particles", result="blob"), + InputResult(name="ctf", dtype="ctf", job_uid="J42", output="particles", result="ctf"), + # passthrough: + InputResult(name=None, dtype="location", job_uid="J42", output="particles", result="location"), + ], + ) + passthrough_result = OutputResult(name="location", dtype="location", passthrough=True) + job.spec.inputs.root["particles"] = [input_particles] + job.spec.outputs.root["particles_class_0"].results.append(passthrough_result) + return job + + +@pytest.fixture +def mock_job(mock_new_job_with_connection: Job): # completed + job = mock_new_job_with_connection.model_copy(update={"status": "completed"}, deep=True) + # fmt: off + output_particles_class_0 = Output( + type="particle", + results=[ + OutputResult( + name="blob", + dtype="blob", + versions=[0, 100, 200, 300, 400, 863], + metafiles=["J42/class_00_00000_particles.cs", "J42/class_00_00100_particles.cs", "J42/class_00_00200_particles.cs", "J42/class_00_00300_particles.cs", "J42/class_00_00400_particles.cs", "J42/class_00_final_particles.cs"], + num_items=[90, 9090, 10000, 10000, 10000, 10000], + ), + OutputResult( + name="ctf", + dtype="ctf", + versions=[0, 100, 200, 300, 400, 863], + metafiles=["J42/class_00_00000_particles.cs", "J42/class_00_00100_particles.cs", "J42/class_00_00200_particles.cs", "J42/class_00_00300_particles.cs", "J42/class_00_00400_particles.cs", "J42/class_00_final_particles.cs"], + num_items=[90, 9090, 10000, 10000, 10000, 10000], + ), + OutputResult( + name="alignments3D", + dtype="alignments3D", + versions=[0, 100, 200, 300, 400, 863], + metafiles=["J42/class_00_00000_particles.cs", "J42/class_00_00100_particles.cs", "J42/class_00_00200_particles.cs", "J42/class_00_00300_particles.cs", "J42/class_00_00400_particles.cs", "J42/class_00_final_particles.cs"], + num_items=[90, 9090, 10000, 10000, 10000, 10000], + ), + OutputResult( + name="location", + dtype="location", + versions=[0], + metafiles=["J42/passthrough_particles_class_0.cs"], + num_items=[10000], + passthrough=True, + ), + ], + num_items=10000, + ) + output_volume_class_0 = Output( + type="volume", + results=[ + OutputResult( + name="map", + dtype="blob", + versions=[0, 100, 200, 300, 400, 862], + metafiles=["J42/class_00_00000_volume.cs", "J42/class_00_00100_volume.cs", "J42/class_00_00200_volume.cs", "J42/class_00_00300_volume.cs", "J42/class_00_00400_volume.cs", "J42/class_00_final_volume.cs"], + num_items=[1, 1, 1, 1, 1, 1], + ) + ], + num_items=1, + ) + # fmt: on + job.spec.outputs.root["particles_class_0"] = output_particles_class_0 + job.spec.outputs.root["volume_class_0"] = output_volume_class_0 + return job + + +@pytest.fixture +def job(cs: CryoSPARC, project: ProjectController, mock_job: Job): + APIClient.jobs.find_one.return_value = mock_job # type: ignore + return project.find_job("J42") diff --git a/tests/controllers/__init__.py b/tests/controllers/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/controllers/test_job.py b/tests/controllers/test_job.py new file mode 100644 index 00000000..a1a5a697 --- /dev/null +++ b/tests/controllers/test_job.py @@ -0,0 +1,172 @@ +import sys +from unittest import mock + +import pytest + +from cryosparc.api import APIClient +from cryosparc.controllers.job import ExternalJobController, JobController +from cryosparc.controllers.project import ProjectController +from cryosparc.models.job import Job +from cryosparc.models.job_spec import ( + JobSpec, + Output, + OutputResult, + OutputSlot, + OutputSpec, + Params, +) +from cryosparc.tools import CryoSPARC + +from ..conftest import T20S_PARTICLES + + +@pytest.fixture +def mock_enqueue_endpoint(mock_job: Job): + assert isinstance(endpoint := APIClient.jobs.enqueue, mock.Mock) + endpoint.return_value = mock_job.model_copy(update={"status": "queued"}) + return endpoint + + +def test_queue(job: JobController, mock_enqueue_endpoint: mock.Mock): + job.queue() + assert job.model.status == "queued" + mock_enqueue_endpoint.assert_called_once_with(job.project_uid, job.uid, lane=None, hostname=None, gpus=[]) + + +def test_queue_worker(job: JobController, mock_enqueue_endpoint: mock.Mock): + job.queue(lane="workers", hostname="worker1", gpus=[1]) + assert job.model.status == "queued" + mock_enqueue_endpoint.assert_called_once_with( + job.project_uid, job.uid, lane="workers", hostname="worker1", gpus=[1] + ) + + +def test_queue_cluster(job: JobController, mock_enqueue_endpoint: mock.Mock): + assert isinstance(mock_vars_endpoint := APIClient.jobs.set_cluster_custom_vars, mock.Mock) + vars = {"var1": 42, "var2": "test"} + job.queue(lane="cluster", cluster_vars=vars) + assert job.model.status == "queued" + mock_vars_endpoint.assert_called_once_with(job.project_uid, job.uid, vars) + mock_enqueue_endpoint.assert_called_once_with(job.project_uid, job.uid, lane="cluster", hostname=None, gpus=[]) + + +def test_load_output_all_slots(job: JobController, t20s_particles, t20s_particles_passthrough): + assert isinstance(mock_load_output_endpoint := APIClient.jobs.load_output, mock.Mock) + mock_load_output_endpoint.return_value = t20s_particles.innerjoin(t20s_particles_passthrough) + particles = job.load_output("particles_class_0") + assert set(particles.prefixes()) == {"location", "blob", "ctf"} + mock_load_output_endpoint.assert_called_once_with( + job.project_uid, job.uid, "particles_class_0", slots="all", version="F" + ) + + +def test_load_output_some_slots(job: JobController, t20s_particles, t20s_particles_passthrough): + assert isinstance(mock_load_output_endpoint := APIClient.jobs.load_output, mock.Mock) + mock_load_output_endpoint.return_value = t20s_particles.innerjoin(t20s_particles_passthrough) + slots = ["location", "blob", "ctf"] + particles = job.load_output("particles_class_0", slots=slots) + assert set(particles.prefixes()) == set(slots) + mock_load_output_endpoint.assert_called_once_with( + job.project_uid, job.uid, "particles_class_0", slots=slots, version="F" + ) + + +def test_job_subprocess_io(job: JobController): + assert isinstance(mock_log_endpoint := APIClient.jobs.add_event_log, mock.Mock) + + job.subprocess( + [sys.executable, "-c", 'import sys; print("hello"); print("error", file=sys.stderr); print("world")'] + ) + + assert len(mock_log_endpoint.mock_calls) == 7 # includes some prelude/divider calls + mock_log_endpoint.assert_has_calls( + [ + mock.call(job.project_uid, job.uid, "hello", type="text"), + mock.call(job.project_uid, job.uid, "error", type="text"), + mock.call(job.project_uid, job.uid, "world", type="text"), + ], + any_order=True, + ) + + +def test_create_external_job(cs: CryoSPARC, project: ProjectController, external_job: ExternalJobController): + assert project.uid == external_job.project_uid + assert isinstance(mock_create_endpoint := APIClient.jobs.create, mock.Mock) + mock_create_endpoint.assert_called_once_with( + project.uid, "W1", type="snowflake", title="Recenter Particles", description="" + ) + + +@pytest.fixture +def mock_external_job(mock_user, mock_project): + return Job( + _id="67292e95282b26b45d0e8fee", + uid="J43", + project_uid=mock_project.uid, + workspace_uids=["W1"], + job_dir="J43", + title="Recenter Particles", + status="building", + created_by_user_id=mock_user.id, + spec=JobSpec(type="snowflake", params=Params()), + ) + + +@pytest.fixture +def external_job(project: ProjectController, mock_external_job: Job): + APIClient.jobs.create.return_value = mock_external_job # type: ignore + return project.create_external_job("W1", title="Recenter Particles") + + +@pytest.fixture +def external_job_with_added_output(external_job: ExternalJobController, mock_external_job: Job): + mock_external_job = mock_external_job.model_copy(deep=True) + mock_external_job.spec.outputs.root["particles"] = Output( + type="particle", + results=[ + OutputResult(name="blob", dtype="blob"), + OutputResult(name="ctf", dtype="ctf"), + ], + ) + APIClient.jobs.add_output.return_value = mock_external_job # type: ignore + external_job.add_output("particle", name="particles", slots=["blob", "ctf"]) + return external_job + + +@pytest.fixture +def mock_external_job_with_saved_output(external_job_with_added_output: ExternalJobController, mock_external_job: Job): + metafile = f"{mock_external_job.uid}/particles.cs" + mock_external_job = mock_external_job.model_copy(deep=True) + mock_external_job.spec.outputs.root["particles"] = Output( + type="particle", + results=[ + OutputResult(name="blob", dtype="blob", versions=[0], metafiles=[metafile], num_items=[10]), + OutputResult(name="ctf", dtype="ctf", versions=[0], metafiles=[metafile], num_items=[10]), + ], + ) + APIClient.jobs.save_output.return_value = mock_external_job # type: ignore + external_job_with_added_output.save_output("particles", T20S_PARTICLES) + return external_job_with_added_output + + +def test_external_job_output(mock_external_job_with_saved_output: ExternalJobController): + assert isinstance(mock_add_output_endpoint := APIClient.jobs.add_external_output, mock.Mock) + assert isinstance(mock_save_output_endpoint := APIClient.jobs.save_output, mock.Mock) + j = mock_external_job_with_saved_output + + mock_add_output_endpoint.assert_called_once_with( + j.project_uid, + j.uid, + "particles", + OutputSpec( + type="particle", + title="particles", + slots=[OutputSlot(name="blob", dtype="blob"), OutputSlot(name="ctf", dtype="ctf")], + ), + ) + mock_save_output_endpoint.assert_called_once_with(j.project_uid, j.uid, "particles", T20S_PARTICLES, version=0) + + +def test_invalid_external_job_output(external_job): + with pytest.raises(ValueError, match="Invalid output name"): + external_job.add_output("particle", name="particles/1", slots=["blob", "ctf"]) diff --git a/tests/controllers/test_project.py b/tests/controllers/test_project.py new file mode 100644 index 00000000..c54822e2 --- /dev/null +++ b/tests/controllers/test_project.py @@ -0,0 +1,2 @@ +def test_project(project, mock_project): + assert project.uid == mock_project.uid diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 00000000..4ff07c11 --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,5 @@ +from cryosparc.tools import CryoSPARC + + +def test_health(cs: CryoSPARC): + assert cs.api.health() == "OK" diff --git a/tests/test_command.py b/tests/test_command.py deleted file mode 100644 index 9cf2d55c..00000000 --- a/tests/test_command.py +++ /dev/null @@ -1,5 +0,0 @@ -from cryosparc.tools import CryoSPARC - - -def test_hello(cs: CryoSPARC): - assert cs.cli.hello_world() == {"hello": "world"} # type: ignore diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 7aa42b61..39c75bff 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -5,7 +5,7 @@ import pytest from cryosparc.dataset import CSDAT_FORMAT, Column -from cryosparc.row import Row +from cryosparc.dataset.row import Row from .conftest import Dataset diff --git a/tests/test_job.py b/tests/test_job.py deleted file mode 100644 index 57a99073..00000000 --- a/tests/test_job.py +++ /dev/null @@ -1,256 +0,0 @@ -import sys - -import httpretty -import pytest - -from cryosparc.dataset import Dataset -from cryosparc.job import ExternalJob, Job -from cryosparc.project import Project -from cryosparc.tools import CryoSPARC - -from .conftest import T20S_PARTICLES - - -@pytest.fixture -def job(cs, project: Project): - return project.find_job("J1") - - -@pytest.fixture -def mock_external_job_doc(): - return { - "_id": "67292e95282b26b45d0e8fee", - "uid": "J2", - "uid_num": 2, - "project_uid": "P1", - "project_uid_num": 1, - "type": "snowflake", - "job_type": "snowflake", - "title": "Recenter Particles", - "description": "Enter a description.", - "status": "building", - "created_at": "Mon, 04 Nov 2024 20:29:09 GMT", - "created_by_user_id": "61f0383552d791f286b796ef", - "parents": [], - "children": [], - "input_slot_groups": [], - "output_result_groups": [], - "output_results": [], - "params_base": {}, - "params_spec": {}, - "params_secs": {}, - "workspace_uids": ["W1"], - } - - -@pytest.fixture -def external_job( - mock_jsonrpc_procs_vis, - mock_jsonrpc_procs_core, - mock_external_job_doc, - cs: CryoSPARC, - project: Project, -): - mock_jsonrpc_procs_vis["create_external_job"] = "J2" - mock_jsonrpc_procs_core["get_job"] = mock_external_job_doc - cs.cli() - cs.vis() - return project.create_external_job("W1", title="Recenter Particles") - - -def test_queue(job: Job): - job.queue() - queue_request = httpretty.latest_requests()[-3] - refresh_request = httpretty.latest_requests()[-1] - assert queue_request.parsed_body["method"] == "enqueue_job" - assert queue_request.parsed_body["params"] == { - "project_uid": job.project_uid, - "job_uid": job.uid, - "lane": None, - "user_id": job.cs.user_id, - "hostname": None, - "gpus": False, - } - assert refresh_request.parsed_body["method"] == "get_job" - - -def test_queue_worker(job: Job): - job.queue(lane="workers", hostname="worker1", gpus=[1]) - queue_request = httpretty.latest_requests()[-3] - refresh_request = httpretty.latest_requests()[-1] - assert queue_request.parsed_body["method"] == "enqueue_job" - assert queue_request.parsed_body["params"] == { - "project_uid": job.project_uid, - "job_uid": job.uid, - "lane": "workers", - "user_id": job.cs.user_id, - "hostname": "worker1", - "gpus": [1], - } - assert refresh_request.parsed_body["method"] == "get_job" - - -def test_queue_cluster(job: Job): - vars = {"var1": 42, "var2": "test"} - job.queue(lane="cluster", cluster_vars=vars) - vars_request = httpretty.latest_requests()[-5] - queue_request = httpretty.latest_requests()[-3] - refresh_request = httpretty.latest_requests()[-1] - assert vars_request.parsed_body["method"] == "set_cluster_job_custom_vars" - assert vars_request.parsed_body["params"] == { - "project_uid": job.project_uid, - "job_uid": job.uid, - "cluster_job_custom_vars": vars, - } - assert queue_request.parsed_body["method"] == "enqueue_job" - assert queue_request.parsed_body["params"] == { - "project_uid": job.project_uid, - "job_uid": job.uid, - "lane": "cluster", - "user_id": job.cs.user_id, - "hostname": None, - "gpus": False, - } - assert refresh_request.parsed_body["method"] == "get_job" - - -def test_load_output_all_slots(job: Job): - output = job.load_output("particles_class_0") - assert set(output.prefixes()) == {"location", "blob", "ctf"} - - -def test_load_output_some_missing_slots(job: Job): - with pytest.raises( - ValueError, - match=( - "Cannot load output particles_class_0 slot pick_stats because " - "output does not have an associated dataset file. " - ), - ): - job.load_output("particles_class_0", slots=["blob", "pick_stats"]) - - -def test_load_output_some_slots(job: Job, t20s_particles, t20s_particles_passthrough): - particles = job.load_output("particles_class_0", slots=["location", "blob", "ctf"]) - assert particles == Dataset.innerjoin_many(t20s_particles, t20s_particles_passthrough) - - -def test_job_subprocess_io(job: Job): - job.subprocess( - [sys.executable, "-c", 'import sys; print("hello"); print("error", file=sys.stderr); print("world")'] - ) - - request = httpretty.latest_requests()[-3] # last two requests are "subprocess completed" log lines - body = request.parsed_body - assert body["method"] == "job_send_streamlog" - - # Lines may arrive out of order, either is okay - params = body["params"] - opt1 = {"project_uid": "P1", "job_uid": "J1", "message": "error", "error": False} - opt2 = {"project_uid": "P1", "job_uid": "J1", "message": "world", "error": False} - assert params == opt1 or params == opt2 - - -def test_create_external_job(cs: CryoSPARC, external_job: ExternalJob): - requests = httpretty.latest_requests() - create_external_job_request = requests[-3] - create_external_job_body = create_external_job_request.parsed_body - find_external_job_request = requests[-1] - find_external_job_body = find_external_job_request.parsed_body - - assert create_external_job_body["method"] == "create_external_job" - assert create_external_job_body["params"] == { - "project_uid": "P1", - "workspace_uid": "W1", - "user": cs.user_id, - "title": "Recenter Particles", - "desc": None, - } - assert find_external_job_body["method"] == "get_job" - assert find_external_job_body["params"] == ["P1", "J2"] - - -@pytest.fixture -def external_job_output(mock_jsonrpc_procs_vis, mock_external_job_doc, cs: CryoSPARC, external_job: ExternalJob): - mock_external_job_doc["output_result_groups"] = [ - { - "uid": "J2-G1", - "type": "particle", - "name": "particles", - "title": "Particles", - "description": "", - "contains": [ - { - "uid": "J2-R1", - "type": "particle.blob", - "group_name": "particles", - "name": "blob", - "passthrough": False, - }, - { - "uid": "J2-R2", - "type": "particle.ctf", - "group_name": "particles", - "name": "ctf", - "passthrough": False, - }, - ], - "passthrough": False, - } - ] - mock_external_job_doc["output_results"] = [ - { - "uid": "J2-R1", - "type": "particle.blob", - "group_name": "particles", - "name": "blob", - "title": "", - "description": "", - "min_fields": [["path", "O"], ["idx", "u4"], ["shape", "2u4"], ["psize_A", "f4"], ["sign", "f4"]], - "versions": [0], - "metafiles": ["J2/particles.cs"], - "num_items": [10], - "passthrough": False, - }, - { - "uid": "J2-R2", - "type": "particle.ctf", - "group_name": "particles", - "name": "ctf", - "title": "", - "description": "", - "min_fields": [["type", "O"], ["exp_group_id", "u4"], ["accel_kv", "f4"], ["cs_mm", "f4"]], - "versions": [0], - "metafiles": ["J2/particles.cs"], - "num_items": [10], - "passthrough": False, - }, - ] - mock_jsonrpc_procs_vis["add_external_job_output"] = "particles" - httpretty.register_uri( - httpretty.POST, - "http://localhost:39003/external/projects/P1/jobs/J2/outputs/particles/dataset", - body='"particles"', - ) - - cs.vis() - external_job.add_output("particle", name="particles", slots=["blob", "ctf"]) - external_job.save_output("particles", T20S_PARTICLES) - return T20S_PARTICLES - - -def test_external_job_output(external_job_output): - requests = httpretty.latest_requests() - create_output_request = requests[-3] - find_external_job_request = requests[-1] - find_external_job_body = find_external_job_request.parsed_body - - assert len(external_job_output) > 0 - assert create_output_request.url == "http://localhost:39003/external/projects/P1/jobs/J2/outputs/particles/dataset" - assert find_external_job_body["method"] == "get_job" - assert find_external_job_body["params"] == ["P1", "J2"] - - -def test_invalid_external_job_output(external_job): - with pytest.raises(ValueError, match="Invalid output name"): - external_job.add_output("particle", name="particles/1", slots=["blob", "ctf"]) diff --git a/tests/test_tools.py b/tests/test_tools.py index e39589e2..ede4bb04 100644 --- a/tests/test_tools.py +++ b/tests/test_tools.py @@ -1,58 +1,50 @@ -import httpretty +from unittest import mock -from cryosparc.job import Job -from cryosparc.project import Project +from cryosparc.api import APIClient +from cryosparc.controllers.job import Job, JobController +from cryosparc.controllers.project import Project +from cryosparc.models.job_spec import Params from cryosparc.tools import CryoSPARC -def test_create_job_basic(cs: CryoSPARC, project: Project): - job = cs.create_job(project.uid, "W1", "homo_abinit") - assert isinstance(job, Job) - assert job.uid == "J1" +def test_create_job_basic(cs: CryoSPARC, project: Project, mock_new_job: Job): + assert isinstance(mock_create_endpoint := APIClient.jobs.create, mock.Mock) + mock_create_endpoint.return_value = mock_new_job - latest_requests = httpretty.latest_requests() - create_job_request = latest_requests[-3] - get_job_request = latest_requests[-1] - assert create_job_request.parsed_body["method"] == "make_job" - assert create_job_request.parsed_body["params"] == { - "job_type": "homo_abinit", - "project_uid": project.uid, - "workspace_uid": "W1", - "user_id": cs.user_id, - "params": {}, - "input_group_connects": {}, - "title": None, - "desc": None, - } - assert get_job_request.parsed_body["method"] == "get_job" - assert get_job_request.parsed_body["params"] == ["P1", "J1"] + job = cs.create_job(project.uid, "W1", "homo_abinit") + assert isinstance(job, JobController) + assert job.uid == mock_new_job.uid + assert len(job.model.spec.params.model_dump(exclude_defaults=True, exclude_none=True)) == 0 + mock_create_endpoint.assert_called_once_with( + project.uid, "W1", type="homo_abinit", title="", description="", params={} + ) -def test_create_job_connect_params(cs: CryoSPARC, project: Project): +def test_create_job_connect_params( + cs: CryoSPARC, + project: Project, + mock_params: Params, + mock_new_job_with_params: Job, + mock_new_job_with_connection: Job, +): + assert isinstance(mock_create_endpoint := APIClient.jobs.create, mock.Mock) + assert isinstance(mock_connect_endpoint := APIClient.jobs.connect, mock.Mock) + mock_create_endpoint.return_value = mock_new_job_with_params + mock_connect_endpoint.return_value = mock_new_job_with_connection job = cs.create_job( project.uid, "W1", "homo_abinit", - connections={"particles": ("J2", "particles_selected")}, - params={"abinit_K": 3}, + connections={"particles": ("J41", "particles")}, + params=mock_params.model_dump(), + ) + assert isinstance(job, JobController) + assert job.uid == mock_new_job_with_connection.uid + assert job.model.spec.params == mock_params + assert len(job.model.spec.inputs.root["particles"]) == 1 + mock_create_endpoint.assert_called_once_with( + project.uid, "W1", type="homo_abinit", title="", description="", params=mock_params.model_dump() + ) + mock_connect_endpoint.assert_called_once_with( + project.uid, job.uid, "particles", source_job_uid="J41", source_output_name="particles" ) - assert isinstance(job, Job) - assert job.uid == "J1" - - latest_requests = httpretty.latest_requests() - create_job_request = latest_requests[-3] - get_job_request = latest_requests[-1] - - assert create_job_request.parsed_body["method"] == "make_job" - assert create_job_request.parsed_body["params"] == { - "job_type": "homo_abinit", - "project_uid": project.uid, - "workspace_uid": "W1", - "user_id": cs.user_id, - "params": {"abinit_K": 3}, - "input_group_connects": {"particles": ["J2.particles_selected"]}, - "title": None, - "desc": None, - } - assert get_job_request.parsed_body["method"] == "get_job" - assert get_job_request.parsed_body["params"] == ["P1", "J1"] diff --git a/typings/cryosparc/core.pyi b/typings/cryosparc/core.pyi deleted file mode 100644 index 6b1da4bf..00000000 --- a/typings/cryosparc/core.pyi +++ /dev/null @@ -1,45 +0,0 @@ -""" -This type stub file was generated by pyright. -""" - -from enum import Enum -from typing import Optional, Tuple - -from numpy.typing import NDArray - -__all__ = ["DsetType", "Stream", "Data"] - -class MemoryView: # Note: Supports buffer protocol. - base: "Array" - size: int - itemsize: int - nbytes: int - ndim: int - shape: Tuple[int, ...] - strides: Tuple[int, ...] - suboffsets: Tuple[int, ...] - T: "MemoryView" - def copy(self) -> "MemoryView": ... - def copy_fortran(self) -> "MemoryView": ... - def is_c_contig(self) -> bool: ... - def is_f_contig(self) -> bool: ... - -class Array: - memview: MemoryView - -class DsetType(int, Enum): - pass - -class Data: - pass - -class Stream: - def __init__(self, data: Data) -> None: ... - def cast_objs_to_strs(self) -> None: ... - def stralloc_col(self, col: str) -> Optional[Array]: ... - def compress_col(self, col: str) -> Array: ... - def compress_numpy(self, arr: NDArray) -> Array: ... - def compress(self, arr: Array) -> Array: ... - def decompress_col(self, col: str, data: bytes) -> Array: ... - def decompress_numpy(self, data: bytes, arr: NDArray) -> Array: ... - def decompress(self, data: bytes, outptr: int = 0) -> Array: ...