Skip to content

Commit

Permalink
Merge pull request #7 from heikomuller/dev-0.1.2
Browse files Browse the repository at this point in the history
Datetime handling and CLI
  • Loading branch information
heikomuller authored Jun 25, 2020
2 parents 4fe279b + e3dbad7 commit b7df4e5
Show file tree
Hide file tree
Showing 37 changed files with 1,431 additions and 87 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -101,5 +101,6 @@ ENV/
# mypy
.mypy_cache/

# Example archive files
# Ignore application and data files and folders
data/nyc-open-data/y43c-5n92/archive/
.histore/
8 changes: 8 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,11 @@
* Allow different types of input documents (e.g., CSV files or Json)
* External merge-sort for large CSV files.
* Add managers for maintaining sets of archives


### 0.1.2 - 06-25-2020

* Proper handling of date/time objects by the default archive reader and writer
* Optional arguments for Json encoder and decoder for persistent archives
* Add encoder and decoder information to archive manager metadata
* Simple command-line interface for persistent archive manager
10 changes: 8 additions & 2 deletions histore/archive/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ class PersistentArchive(Archive):
"""
def __init__(
self, basedir, replace=False, serializer=None, compression=None,
primary_key=None
primary_key=None, encoder=None, decoder=None
):
"""Initialize the associated archive store and the optional primary
key columns that are used to generate row identifier. If no primary
Expand All @@ -328,13 +328,19 @@ def __init__(
compressed. the metadata file is always storesd as plain text.
primary_key: string or list
Column(s) that are used to generate identifier for snapshot rows.
encoder: json.JSONEncoder, default=None
Encoder used when writing archive rows as JSON objects to file.
decoder: func, default=None
Custom decoder function when reading archive rows from file.
"""
super(PersistentArchive, self).__init__(
store=ArchiveFileStore(
basedir=basedir,
replace=replace,
serializer=serializer,
compression=compression
compression=compression,
encoder=encoder,
decoder=decoder
),
primary_key=primary_key
)
51 changes: 48 additions & 3 deletions histore/archive/manager/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def archives(self):
Returns
-------
dict
dict(string: histore.archive.manager.descriptor.ArchiveDescriptor)
"""
raise NotImplementedError()

Expand All @@ -42,7 +42,10 @@ def contains(self, identifier):
return identifier in self.archives()

@abstractmethod
def create(self, name=None, description=None, primary_key=None):
def create(
self, name=None, description=None, primary_key=None, encoder=None,
decoder=None
):
"""Create a new archive object.
Parameters
Expand All @@ -54,6 +57,12 @@ def create(self, name=None, description=None, primary_key=None):
primary_key: string or list, default=None
Column(s) that are used to generate identifier for rows in the
archive.
encoder: string, default=None
Full package path for the Json encoder class that is used by the
persistent archive.
decoder: string, default=None
Full package path for the Json decoder function that is used by the
persistent archive.
Returns
-------
Expand Down Expand Up @@ -84,14 +93,32 @@ def get(self, identifier):
Returns
-------
histore.archive.vase.Archive
histore.archive.base.Archive
Raises
------
ValueError
"""
raise NotImplementedError()

def get_by_name(self, name):
"""Get descriptor for the archive with the given name. If no archive
with that name exists None is returned.
Parameters
----------
name: string
Archive name
Returns
-------
histore.archive.manager.descriptor.ArchiveDescriptor
"""
for archive in self.archives().values():
if archive.name() == name:
return archive
return None

def list(self):
"""Get the list of descriptors for the maintained archives.
Expand All @@ -100,3 +127,21 @@ def list(self):
list(histore.archive.manager.descriptor.ArchiveDescriptor)
"""
return list(self.archives().values())

@abstractmethod
def rename(self, identifier, name):
"""Rename the specified archive. Raises a ValueError if the identifier
is unknown or if an archive with the given name exist.
Parameters
----------
identifier: string
Unique archive identifier
name: string
New archive name.
Raises
------
ValueError
"""
raise NotImplementedError()
60 changes: 56 additions & 4 deletions histore/archive/manager/descriptor.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,17 @@
'type': 'object',
'properties': {
'id': {'type': 'string'},
'createdAt': {'type': 'string'},
'name': {'type': 'string'},
'description': {'type': 'string'},
'primaryKey': {
'type': 'array',
'items': {'type': 'string'}
}
},
'encoder': {'type': 'string'},
'decoder': {'type': 'string'}
},
'required': ['id']
'required': ['id', 'createdAt']
}


Expand Down Expand Up @@ -58,7 +61,10 @@ def __init__(self, doc, validate=True):
jsonschema.validate(instance=doc, schema=DESCRIPTOR_SCHEMA)

@staticmethod
def create(name=None, description=None, primary_key=None):
def create(
name=None, description=None, primary_key=None, encoder=None,
decoder=None
):
"""Create a new archive descriptor object.
Parameters
Expand All @@ -70,6 +76,11 @@ def create(name=None, description=None, primary_key=None):
primary_key: string or list, default=None
Column(s) that are used to generate identifier for rows in the
archive.
encoder: string, default=None
Full package path for the Json encoder class that is used by the
persistent archive.
decoder: string, default=None
Full package path for the Json decoder function that is used by the
Returns
-------
Expand All @@ -81,15 +92,37 @@ def create(name=None, description=None, primary_key=None):
# Create a unique identifier for the new archive.
identifier = util.get_unique_identifier()
# Create the archive descriptor.
doc = {'id': identifier}
doc = {'id': identifier, 'createdAt': util.utc_now().isoformat()}
if name is not None:
doc['name'] = name
if description is not None:
doc['description'] = description
if primary_key is not None:
doc['primaryKey'] = primary_key
if encoder is not None:
doc['encoder'] = encoder
if decoder is not None:
doc['decoder'] = decoder
return ArchiveDescriptor(doc)

def created_at(self):
"""Get creating timestamp for the archive.
Returns
-------
datetime.datetime
"""
return util.to_datetime(self.doc.get('createdAt'))

def decoder(self):
"""Get package path for Json decoder used by persistent archives.
Returns
-------
string
"""
return self.doc.get('decoder')

def description(self):
"""Get archive description. If the value is not set in the descriptor
an empty string is returned as default.
Expand All @@ -100,6 +133,15 @@ def description(self):
"""
return self.doc.get('description', '')

def encoder(self):
"""Get package path for Json encoder used by persistent archives.
Returns
-------
string
"""
return self.doc.get('encoder')

def identifier(self):
"""Get the unique archive identifier value.
Expand Down Expand Up @@ -127,3 +169,13 @@ def primary_key(self):
list(string)
"""
return self.doc.get('primaryKey')

def rename(self, name):
"""Update the name of the archive.
Parameters
----------
name: string
New archive name.
"""
self.doc['name'] = name
Loading

0 comments on commit b7df4e5

Please sign in to comment.