Skip to content

Commit

Permalink
Add singularity-inspect metalad extractor
Browse files Browse the repository at this point in the history
Fixes: #198

- Adds "singularity inspect path/to/file.sing" to metadata
- Adds "apptainer --version || singularity version" to metadata
  • Loading branch information
asmacdo committed Mar 9, 2023
1 parent afc5486 commit ee00701
Show file tree
Hide file tree
Showing 6 changed files with 148 additions and 0 deletions.
5 changes: 5 additions & 0 deletions changelog.d/20230307_164111_austin_add_metalad_extractor.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
### 🚀 Enhancements and New Features

- Add metalad extractor using `singularity inspect`.
Fixes https://github.com/datalad/datalad-container/issues/198 via
https://github.com/datalad/datalad-container/pull/200 (by @asmacdo )
Empty file.
86 changes: 86 additions & 0 deletions datalad_container/extractors/metalad_container.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
# emacs: -*- mode: python; py-indent-offset: 4; tab-width: 4; indent-tabs-mode: nil -*-
# ex: set sts=4 ts=4 sw=4 noet:
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
#
# See COPYING file distributed along with the datalad package for the
# copyright and license terms.
#
# ## ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ##
"""MetadataRecord extractor for files stored in Datalad's own core storage"""
import json
import logging
import subprocess
import time
from uuid import UUID

CURRENT_VERSION = "0.0.1"


try:
from datalad_metalad.extractors.base import DataOutputCategory, ExtractorResult, FileMetadataExtractor
from datalad_metalad import get_file_id
except Exception as e:
raise

lgr = logging.getLogger('datalad.metadata.extractors.metalad_container')


class MetaladContainer(FileMetadataExtractor):

def get_data_output_category(self) -> DataOutputCategory:
return DataOutputCategory.IMMEDIATE

def is_content_required(self) -> bool:
return True

def get_id(self) -> UUID:
# Nothing special, made this up - asmacdo
return UUID('3a28cca6-b7a1-11ed-b106-fc3497650c92')


def get_version(self) -> str:
return CURRENT_VERSION

def extract(self, _=None) -> ExtractorResult:
return ExtractorResult(
extractor_version=self.get_version(),
extraction_parameter=self.parameter or {},
extraction_success=True,
datalad_result_dict={
"type": "container",
"status": "ok"
},
immediate_data={
"@id": get_file_id(dict(
path=self.file_info.path,
type=self.file_info.type)),
"type": self.file_info.type,
"path": self.file_info.intra_dataset_path,
"content_byte_size": self.file_info.byte_size,
"comment": f"Container metadata extractor executed at {time.time()}",
"singularity_version": self._singularity_version(),
"singularity_inspect": self._singularity_inspect(self.file_info.path),
})

def _singularity_inspect(self, path) -> str:
data = subprocess.run(
["singularity", "inspect", "--json", path],
check=True,
stdout=subprocess.PIPE).stdout.decode()
return json.loads(data)

def _singularity_version(self) -> str:
try:
# If this works, its "apptainer version 1.1.5-1.fc37"
data = subprocess.run(
["apptainer", "--version"],
check=True,
stdout=subprocess.PIPE
).stdout.decode().strip()
except Exception as e:
# If this is not apptainer, its "1.1.5-1.fc37"
data = subprocess.run(
["singularity", "version"],
check=True,
stdout=subprocess.PIPE).stdout.decode().strip()
return data
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ Documentation

changelog
acknowledgements
metadata-extraction


API Reference
Expand Down
53 changes: 53 additions & 0 deletions docs/source/metadata-extraction.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
Metadata Extraction
*******************

If `datalad-metalad`_ extension is installed, `datalad-container` can
extract metadata from singularity containers images.

(It is recommended to use a tool like `jq` if you would like to read the
output yourself.)

For example:

``datalad meta-extract -d . metalad_container images/bids/bids-pymvpa--1.0.2.sing | jq``

.. code-block::
{
"type": "file",
"dataset_id": "b02e63c2-62c1-11e9-82b0-52540040489c",
"dataset_version": "9ed0a39406e518f0309bb665a99b64dec719fb08",
"path": "images/bids/bids-pymvpa--1.0.2.sing",
"extractor_name": "metalad_container",
"extractor_version": "0.0.1",
"extraction_parameter": {},
"extraction_time": 1678225970.5466852,
"agent_name": "Austin Macdonald",
"agent_email": "[email protected]",
"extracted_metadata": {
"@id": "datalad:SHA1-s993116191--cc7ac6e6a31e9ac131035a88f699dfcca785b844",
"type": "file",
"path": "images/bids/bids-pymvpa--1.0.2.sing",
"content_byte_size": 0,
"comment": "Container metadata extractor executed at 1678225970.4338098",
"singularity_version": "apptainer version 1.1.5-1.fc37",
"singularity_inspect": {
"data": {
"attributes": {
"labels": {
"org.label-schema.build-date": "Thu,_19_Dec_2019_14:58:41_+0000",
"org.label-schema.build-size": "2442MB",
"org.label-schema.schema-version": "1.0",
"org.label-schema.usage.singularity.deffile": "Singularity.bids-pymvpa--1.0.2",
"org.label-schema.usage.singularity.deffile.bootstrap": "docker",
"org.label-schema.usage.singularity.deffile.from": "bids/pymvpa:v1.0.2",
"org.label-schema.usage.singularity.version": "2.5.2-feature-squashbuild-secbuild-2.5.6e68f9725"
}
}
},
"type": "container"
}
}
}
.. _datalad-metalad: http://docs.datalad.org/projects/metalad/en/latest/
3 changes: 3 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,9 @@ datalad.extensions =
# valid datalad interface specification (see demo in this extensions)
container = datalad_container:command_suite

datalad.metadata.extractors =
metalad_container = datalad_container.extractors.metalad_container:MetaladContainer

[versioneer]
# See the docstring in versioneer.py for instructions. Note that you must
# re-run 'versioneer.py setup' after changing this section, and commit the
Expand Down

0 comments on commit ee00701

Please sign in to comment.