upgrade to altamisa v0.3, update list value support (#1789, #2033)

bihealth · Dec 19, 2024 · 08c516c · 08c516c
1 parent 279eba6
commit 08c516c
Show file tree

Hide file tree

Showing 16 changed files with 377 additions and 126 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -33,6 +33,9 @@ Added
     - Missing assay plugin ``__init__.py`` files (#2014)
     - Study plugin override via ISA-Tab comments (#1885)
     - Token auth support in study plugin IGV XML serving views (#1999, #2021)
+    - Support for newlines in altamISA error messages (#2033)
+    - Support for comment, performer and contact field values as list (#1789, #2033)
+    - Support for numeric field values as list (#1789, #2033)
 - **Taskflowbackend**
     - ``TaskflowAPI.raise_submit_api_exception()`` helper (#1847)
 
@@ -47,6 +50,7 @@ Changed
     - Update ``Dockerfile`` for v1.0 upgrades (#2003, #2004)
     - Upgrade to iRODS v4.3.3 in CI (#1815)
     - Upgrade to python-irodsclient v2.2.0 (#2007, #2023)
+    - Upgrade to altamisa v0.3.0 (#2033)
     - Upgrade minimum supported iRODS version to v4.3.3 (#1815, #2007)
     - Use constants for timeline event status types (#2010)
 - **Irodsbackend**
@@ -69,6 +73,7 @@ Changed
     - Return ``503`` in ``IrodsCollsCreateAPIView`` if project is locked (#1847)
     - Return ``503`` in ``IrodsDataRequestAcceptAPIView`` if project is locked (#1847)
     - Return ``ProjectIrodsFileListAPIView`` results as list without ``irods_data`` object (#2040)
+    - Remove length limitation from ``Process.performer`` (#1789, #1942, #2033)
 - **Taskflowbackend**
     - Refactor task tests (#2002)
     - Unify user name parameter naming in flows (#1653)

diff --git a/docs_manual/source/app_samplesheets_edit.rst b/docs_manual/source/app_samplesheets_edit.rst
@@ -81,14 +81,14 @@ the value simply by typing in a standard spreadsheet fashion.
 
     String cell editing
 
-Special syntax is available for basic string values:
+Special syntax is available for basic string and/or numeric values:
 
 Value as List
     You can enter a list of values by separating single values with the
-    semicolon character (``;``).
+    semicolon character (``;``). This works for both string and numeric columns.
 Hyperlinks
     You can render the value as a hyperlink the following syntax:
-    ``Link text <https://your.url>``
+    ``Link text <https://your.url>``. This is only allowed for string columns.
 
 There are also special cases for string input and specific columns:
 
@@ -103,7 +103,9 @@ Node Names
     orphaned files.
 Contacts
     Contact cells act as string cells with the following expected syntax:
-    ``Contact Name <[email protected]>``. The email can be omitted.
+    ``Contact Name <[email protected]>``. The email can be omitted. Multiple
+    contacts can be provided using the semicolon character as a delimter. For
+    example: ``Contact1 <c1.example.com>;Contact1 <c2.example.com>``.
 Dates
     Date cells also provide standard string editing but enforce the ISO 8601
     ``YYYY-MM-DD`` syntax.

diff --git a/docs_manual/source/sodar_release_notes.rst b/docs_manual/source/sodar_release_notes.rst
@@ -19,6 +19,8 @@ Release for SODAR Core v1.0 upgrade, iRODS v4.3 upgrade and feature updates.
 - Add study plugin override via ISA-Tab comments
 - Add session control in Django settings and environment variables
 - Add token-based iRODS/IGV basic auth support for OIDC users
+- Add support for comment, performer and contact field values as list
+- Add support for numeric field values as list
 - Update minimum supported iRODS version to v4.3.3
 - Update REST API versioning
 - Update REST API views for OpenAPI support
@@ -29,14 +31,15 @@ Release for SODAR Core v1.0 upgrade, iRODS v4.3 upgrade and feature updates.
   client-side cert is not set
 - Upgrade to Postgres v16
 - Upgrade to python-irodsclient v2.2.0
+- Upgrade to altamISA v0.3.0
 - Upgrade to SODAR Core v1.0.3
-- `SODAR Core v1.0 updates <https://sodar-core.readthedocs.io/en/latest/major_changes.html#v1-0-3-2024-12-12>`_:
-  OIDC auth support, new REST API versioning, owner/delegate remote sync
-  controls, etc
 - Upgrade to Django v4.2
 - Remove Python v3.8 support
 - Remove Postgres <12 support
 - Remove iRODS <4.3 support
+- `SODAR Core v1.0 updates <https://sodar-core.readthedocs.io/en/latest/major_changes.html#v1-0-3-2024-12-12>`_:
+  OIDC auth support, new REST API versioning, owner/delegate remote sync
+  controls, etc
 
 :ref:`Administrator upgrade guide for v1.0 <admin_upgrade_v1.0>`
 

diff --git a/requirements/base.txt b/requirements/base.txt
@@ -87,7 +87,7 @@ celery==5.3.6
 ####################
 
 # CUBI AltamISA parser
-altamisa==0.2.9
+altamisa==0.3.0
 # -e git+https://github.com/bihealth/altamisa.git@f6de23aa6073d446f7c49ede0af7fa8729992209#egg=altamisa
 
 # Python iRODS client

diff --git a/samplesheets/io.py b/samplesheets/io.py
@@ -464,7 +464,7 @@ def _import_processes(
                 'protocol': protocol,
                 'assay': db_parent if isinstance(db_parent, Assay) else None,
                 'study': study,
-                'performer': p.performer,
+                'performer': ';'.join(p.performer) if p.performer else None,
                 'perform_date': p.date if p.date else None,
                 'array_design_ref': p.array_design_ref,
                 'first_dimension': (
@@ -539,7 +539,7 @@ def import_isa(
         :param save_isa: Save ISA-Tab as backup after importing (bool)
         :param from_template: Whether importing from a template (bool)
         :return: Investigation
-        :raise: SampleSheetExportException if critical warnings are raised
+        :raise: SampleSheetImportException if critical warnings are raised
         """
         t_start = time.time()
         logger.info('altamISA version: {}'.format(altamisa.__version__))
@@ -1070,7 +1070,11 @@ def _export_factor_vals(cls, factor_values):
         return tuple(
             isa_models.FactorValue(
                 name=k,
-                value=cls._export_val(v['value']),
+                value=(
+                    [cls._export_val(v['value'])]
+                    if not isinstance(v['value'], list)
+                    else cls._export_val(v['value'])
+                ),
                 unit=cls._export_val(v['unit']),
             )
             for k, v in factor_values.items()
@@ -1188,6 +1192,7 @@ def _export_processes(cls, processes):
                 perform_date = ''  # Empty string denotes an empty column
             else:
                 perform_date = p.perform_date
+            performer = p.performer.split(';') if p.performer else p.performer
             ret[p.unique_name] = isa_models.Process(
                 protocol_ref=(
                     p.protocol.name if p.protocol else PROTOCOL_UNKNOWN_NAME
@@ -1196,7 +1201,7 @@ def _export_processes(cls, processes):
                 name=p.name,
                 name_type=p.name_type,
                 date=perform_date,
-                performer=p.performer,
+                performer=performer,
                 parameter_values=cls._export_param_values(p.parameter_values),
                 comments=cls._export_comments(p.comments),
                 array_design_ref=p.array_design_ref,

diff --git a/samplesheets/migrations/0023_alter_process_performer.py b/samplesheets/migrations/0023_alter_process_performer.py
@@ -0,0 +1,20 @@
+# Generated by Django 4.2.16 on 2024-10-30 13:45
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ("samplesheets", "0022_update_igv_genome"),
+    ]
+
+    operations = [
+        migrations.AlterField(
+            model_name="process",
+            name="performer",
+            field=models.CharField(
+                blank=True, help_text="Process performer (optional)", null=True
+            ),
+        ),
+    ]
diff --git a/samplesheets/models.py b/samplesheets/models.py
@@ -944,7 +944,6 @@ class Process(NodeMixin, BaseSampleSheet):
 
     #: Process performer (optional)
     performer = models.CharField(
-        max_length=DEFAULT_LENGTH,
         unique=False,
         blank=True,
         null=True,

diff --git a/samplesheets/rendering.py b/samplesheets/rendering.py
@@ -394,9 +394,9 @@ def _add_annotation(self, ann, header, header_type, obj):
         :param obj: GenericMaterial or Process object the annotation belongs to
         """
         unit = None
-        # Special case: Comments as parsed in SODAR v0.5.2 (see #629)
+        # Special case: Comments as string or list
         # TODO: TBD: Should these be added in this function at all?
-        if isinstance(ann, str):
+        if isinstance(ann, (str, list)):
             val = ann
         # Ontology reference(s) (altamISA v0.1+, SODAR v0.5.2+)
         elif isinstance(ann['value'], dict) or (
@@ -483,14 +483,17 @@ def _get_length(value, col_type=None):
             return round(len(value) - nc - wc + 0.6 * nc + 1.3 * wc)
 
         def _is_num(value):
-            """Return whether a value contains an integer/double"""
-            if isinstance(value, str) and '_' in value:
-                return False  # HACK because float() accepts underscore
-            try:
-                float(value)
-                return True
-            except (ValueError, TypeError):
-                return False
+            """Return whether a value contains only integers/doubles"""
+            # Supports lists
+            values = value if isinstance(value, list) else [value]
+            for v in values:
+                if isinstance(v, str) and '_' in v:
+                    return False  # HACK because float() accepts underscore
+                try:
+                    float(v)
+                except (ValueError, TypeError):
+                    return False
+            return True
 
         top_idx = 0  # Top header index
         grp_idx = 0  # Index within current top header group
@@ -504,7 +507,6 @@ def _is_num(value):
                 and header_name not in th.PROCESS_NAME_HEADERS
                 and not self._field_configs[i]
                 and self._field_header[i]['col_type'] not in ['NUMERIC', 'UNIT']
-                and any(_is_num(x[i]['value']) for x in self._table_data)
                 and all(
                     (_is_num(x[i]['value']) or not x[i]['value'])
                     for x in self._table_data
@@ -527,33 +529,34 @@ def _is_num(value):
 
             col_type = self._field_header[i]['col_type']
             if col_type == 'CONTACT':
-                max_cell_len = max(
-                    [
-                        (
-                            _get_length(
-                                re.findall(link_re, x[i]['value'])[0][0]
-                            )
-                            if re.findall(link_re, x[i].get('value'))
-                            else len(x[i].get('value') or '')
-                        )
-                        for x in self._table_data
-                    ]
-                )
+                contact_vals = []
+                for x in self._table_data:
+                    if not x[i].get('value'):
+                        contact_vals.append('')
+                    elif isinstance(x[i]['value'], list):
+                        contact_vals.append('; '.join(x[i]['value']))
+                    else:
+                        contact_vals.append(x[i]['value'])
+                cell_lengths = [
+                    (
+                        _get_length(re.findall(link_re, x)[0][0])
+                        if re.findall(link_re, x)
+                        else len(x or '')
+                    )
+                    for x in contact_vals
+                ]
+                max_cell_len = max(cell_lengths)
             elif col_type == 'EXTERNAL_LINKS':  # Special case, count elements
                 header_len = 0  # Header length is not comparable
-                max_cell_len = max(
-                    [
-                        (
-                            len(x[i]['value'])
-                            if (
-                                x[i]['value']
-                                and isinstance(x[i]['value'], list)
-                            )
-                            else 0
-                        )
-                        for x in self._table_data
-                    ]
-                )
+                cell_lengths = [
+                    (
+                        len(x[i]['value'])
+                        if x[i]['value'] and isinstance(x[i]['value'], list)
+                        else 0
+                    )
+                    for x in self._table_data
+                ]
+                max_cell_len = max(cell_lengths)
             else:  # Generic type
                 max_cell_len = max(
                     [

diff --git a/samplesheets/tests/test_io.py b/samplesheets/tests/test_io.py
@@ -326,9 +326,8 @@ def setUp(self):
         )
         self.p_id = 'p{}'.format(self.project.pk)
 
-    def test_import_ref_val(self):
-        """Test _import_ref_val()"""
-        # Ontology value
+    def test_import_ref_val_ontology(self):
+        """Test _import_ref_val() with ontology value"""
         in_data = (
             self.isa_studies['s_BII-S-1.txt']
             .materials['{}-s0-source-culture1'.format(self.p_id)]
@@ -343,21 +342,15 @@ def test_import_ref_val(self):
         }
         self.assertEqual(out_data, expected)
 
-        # String value
-        in_data = (
-            self.isa_studies['s_BII-S-2.txt']
-            .materials[
-                '{}-s1-sample-NZ_4hrs_Grow1_Drug_Sample_1'.format(self.p_id)
-            ]
-            .factor_values[0]
-            .value
-        )
+    def test_import_ref_val_string(self):
+        """Test _import_ref_val() with string value"""
+        in_data = ' string '
         out_data = self.sheet_io._import_ref_val(in_data)
-        self.assertEqual(out_data, in_data)
+        self.assertEqual(out_data, in_data.strip())
 
-    def test_import_multi_val(self):
-        """Test _import_multi_val()"""
-        # List with a single ontology value (should return just a single dict)
+    def test_import_multi_val_single(self):
+        """Test _import_multi_val() with single ontology value"""
+        # Should return just a single dict
         in_data = (
             self.isa_studies['s_BII-S-1.txt']
             .materials['{}-s0-source-culture1'.format(self.p_id)]
@@ -372,9 +365,33 @@ def test_import_multi_val(self):
         }
         self.assertEqual(out_data, expected)
 
-        # TODO: List with multiple values (see issue #434)
+    def test_import_multi_val_list(self):
+        """Test _import_multi_val() with list of ontology values"""
+        in_data = [
+            isa_models.OntologyTermRef(
+                name='n1', accession='https://n1', ontology_name='TEST1'
+            ),
+            isa_models.OntologyTermRef(
+                name='n2', accession='https://n2', ontology_name='TEST2'
+            ),
+        ]
+        out_data = self.sheet_io._import_multi_val(in_data)
+        expected = [
+            {
+                'name': in_data[0].name,
+                'accession': in_data[0].accession,
+                'ontology_name': in_data[0].ontology_name,
+            },
+            {
+                'name': in_data[1].name,
+                'accession': in_data[1].accession,
+                'ontology_name': in_data[1].ontology_name,
+            },
+        ]
+        self.assertEqual(out_data, expected)
 
-        # Single ontology value
+    def test_import_multi_val_factor_value(self):
+        """Test _import_multi_val() with factor value"""
         in_data = (
             self.isa_studies['s_BII-S-1.txt']
             .materials['{}-s0-sample-C-0.07-aliquot9'.format(self.p_id)]
@@ -383,13 +400,14 @@ def test_import_multi_val(self):
         )
         out_data = self.sheet_io._import_multi_val(in_data)
         expected = {
-            'name': in_data.name,
-            'accession': in_data.accession,
-            'ontology_name': in_data.ontology_name,
+            'name': in_data[0].name,
+            'accession': in_data[0].accession,
+            'ontology_name': in_data[0].ontology_name,
         }
         self.assertEqual(out_data, expected)
 
-        # Ontology unit
+    def test_import_multi_val_ontology_unit(self):
+        """Test _import_multi_val() with ontology unit"""
         in_data = (
             self.isa_studies['s_BII-S-1.txt']
             .materials['{}-s0-sample-C-0.07-aliquot9'.format(self.p_id)]
@@ -595,8 +613,8 @@ def test_export_factors(self):
         }
         self.assertEqual(out_data, expected)
 
-    def test_export_factor_values(self):
-        """Test _export_factor_values()"""
+    def test_export_factor_vals(self):
+        """Test _export_factor_vals()"""
         study = self.investigation.studies.get(identifier='BII-S-1')
         in_data = study.materials.get(
             unique_name='{}-s0-sample-C-0.07-aliquot1'.format(self.p_id)
@@ -605,7 +623,7 @@ def test_export_factor_values(self):
         expected = tuple(
             isa_models.FactorValue(
                 name=k,
-                value=self.sheet_io._export_val(v['value']),
+                value=[self.sheet_io._export_val(v['value'])],
                 unit=self.sheet_io._export_val(v['unit']),
             )
             for k, v in in_data.items()