Skip to content

Commit

Permalink
add missing stat() methods
Browse files Browse the repository at this point in the history
  • Loading branch information
ericvergnaud committed Sep 10, 2024
1 parent c531c3f commit 19fc07b
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 2 deletions.
17 changes: 16 additions & 1 deletion src/databricks/labs/blueprint/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import posixpath
import re
import shutil
import stat
from abc import abstractmethod
from collections.abc import Generator, Iterable, Sequence
from io import BytesIO, StringIO
Expand Down Expand Up @@ -121,7 +122,6 @@ class _DatabricksPath(Path, abc.ABC): # pylint: disable=too-many-public-methods
# Public APIs that we don't support.
as_uri = _na("as_uri")
cwd = _na("cwd")
stat = _na("stat")
chmod = _na("chmod")
lchmod = _na("lchmod")
lstat = _na("lstat")
Expand All @@ -138,6 +138,7 @@ def __new__(cls, *args, **kwargs):
# Force all initialisation to go via __init__() irrespective of the (Python-specific) base version.
return object.__new__(cls)

# pylint: disable=super-init-not-called
def __init__(self, ws: WorkspaceClient, *args: str | bytes | os.PathLike) -> None:
# We deliberately do _not_ call the super initializer because we're taking over complete responsibility for the
# implementation of the public API.
Expand Down Expand Up @@ -385,6 +386,7 @@ def with_suffix(self: P, suffix: str) -> P:
raise ValueError(msg)
return self.with_name(stem + suffix)

# pylint: disable=arguments-differ
def relative_to(self: P, *other: str | bytes | os.PathLike, walk_up: bool = False) -> P:
normalized = self.with_segments(*other)
if self.anchor != normalized.anchor:
Expand Down Expand Up @@ -691,6 +693,12 @@ def _file_info(self) -> FileInfo:
self._cached_file_info = self._ws.dbfs.get_status(self.as_posix())
return self._cached_file_info

def stat(self, *, follow_symlinks=True) -> os.stat_result:
seq: list[float] = [-1] * 10
seq[stat.ST_SIZE] = self._file_info.file_size or -1 # 6
seq[stat.ST_MTIME] = self._file_info.modification_time or -1 # 8
return os.stat_result(seq)

def is_dir(self) -> bool:
"""Return True if the path points to a DBFS directory."""
try:
Expand Down Expand Up @@ -841,6 +849,13 @@ def _object_info(self) -> ObjectInfo:
self._cached_object_info = self._ws.workspace.get_status(self.as_posix())
return self._object_info

def stat(self, *, follow_symlinks=True) -> os.stat_result:
seq: list[float] = [-1] * 10
seq[stat.ST_SIZE] = self._object_info.size or -1 # 6
seq[stat.ST_MTIME] = self._object_info.modified_at or -1 # 8
seq[stat.ST_CTIME] = self._object_info.created_at or -1 # 9
return os.stat_result(seq)

def is_dir(self) -> bool:
"""Return True if the path points to a directory in Databricks Workspace."""
try:
Expand Down
24 changes: 23 additions & 1 deletion tests/unit/test_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@
from databricks.sdk import WorkspaceClient
from databricks.sdk.errors import NotFound, ResourceDoesNotExist
from databricks.sdk.mixins.workspace import WorkspaceExt
from databricks.sdk.service.files import FileInfo
from databricks.sdk.service.workspace import (
ImportFormat,
Language,
ObjectInfo,
ObjectType,
)

from databricks.labs.blueprint.paths import WorkspacePath
from databricks.labs.blueprint.paths import DBFSPath, WorkspacePath


def test_empty_init() -> None:
Expand Down Expand Up @@ -1007,3 +1008,24 @@ def test_rglob() -> None:
WorkspacePath(ws, "/test/path/dir1/file1.json"),
WorkspacePath(ws, "/test/path/dir2/file2.json"),
}


def test_workspace_path_stat_has_fields():
info = ObjectInfo(created_at=1234, modified_at=2345, size=3456)
ws = create_autospec(WorkspaceClient)
ws.workspace.get_status.return_value = info
workspace_path = WorkspacePath(ws, "/test/path")
stats = workspace_path.stat()
assert stats.st_ctime == info.created_at
assert stats.st_mtime == info.modified_at
assert stats.st_size == info.size


def test_dbfs_path_stat_has_fields():
info = FileInfo(modification_time=2345, file_size=3456)
ws = create_autospec(WorkspaceClient)
ws.dbfs.get_status.return_value = info
dbfs_path = DBFSPath(ws, "/test/path")
stats = dbfs_path.stat()
assert stats.st_mtime == info.modification_time
assert stats.st_size == info.file_size

0 comments on commit 19fc07b

Please sign in to comment.