From 207d00a5c317e938ef107c944964659e15716758 Mon Sep 17 00:00:00 2001 From: Bhavin Katabathuni Date: Wed, 29 Nov 2023 10:39:50 +0100 Subject: [PATCH] Data Models for db interaction --- ckanext/rdkit_visuals/models/base.py | 3 + ckanext/rdkit_visuals/models/molecule_rel.py | 152 ++++++++++++++----- ckanext/rdkit_visuals/models/molecule_tab.py | 103 +++++++------ 3 files changed, 176 insertions(+), 82 deletions(-) create mode 100644 ckanext/rdkit_visuals/models/base.py diff --git a/ckanext/rdkit_visuals/models/base.py b/ckanext/rdkit_visuals/models/base.py new file mode 100644 index 0000000..7c2377a --- /dev/null +++ b/ckanext/rdkit_visuals/models/base.py @@ -0,0 +1,3 @@ +from sqlalchemy.ext.declarative import declarative_base + +Base = declarative_base() \ No newline at end of file diff --git a/ckanext/rdkit_visuals/models/molecule_rel.py b/ckanext/rdkit_visuals/models/molecule_rel.py index 85b7279..a5b9dd0 100644 --- a/ckanext/rdkit_visuals/models/molecule_rel.py +++ b/ckanext/rdkit_visuals/models/molecule_rel.py @@ -1,39 +1,117 @@ # encoding: utf-8 -from six import text_type -from sqlalchemy import orm, types, Column, Table, ForeignKey -from sqlalchemy.ext.associationproxy import association_proxy - -from ckan.model import ( - meta, - core, - package as _package, - extension, - domain_object, - types as _types, -) - -__all__ = [u'MolecularRelationData', u'molecule_rel_data_table'] - -molecule_rel_data_table = Table(u'molecule_rel_data', meta.metadata, - Column(u'id', types.Integer, primary_key=True, nullable=False), - Column(u'molecule_id', types.UnicodeText, ForeignKey('molecules.id'), nullable = False), - Column(u'package_id', types.UnicodeText, ForeignKey('package.id'), nullable=False),) - - - -class MolecularRelationData(domain_object.DomainObject): - def __init__(self, related_object): - self.package_id = related_object.get('package_id') - self.molecule_id = related_object.get('molecule_id') - - -meta.mapper( - MolecularRelationData, - molecule_rel_data_table, - properties={ - u"package": orm.relation( - _package, backref=orm.backref(u"molecules", cascade=u"all, delete, delete-orphan") - ) - }, -) +import datetime +from sqlalchemy import Column, ForeignKey, func, String, distinct +from sqlalchemy.orm import relationship +from sqlalchemy import orm + +from ckan.model import meta, Package, domain_object +from ckanext.rdkit_visuals.models.molecule_tab import Molecules +from sqlalchemy import types as _types +from ckan.model import Session +from ckan.model import meta +from .base import Base + + + +class MolecularRelationData(Base): + __tablename__ = "molecule_rel_data" + + """ + Table which contains molecule and package relationship. + + molecules_id from molecules data table are stored here. + Which internally, relates to the packages and their ids. + + These two are combined in this table, for simplier access. + + """ + + id = Column(u'id', _types.Integer, primary_key=True,autoincrement=True, nullable=False) + molecules_id = Column(u'molecules_id', _types.UnicodeText, ForeignKey('molecules.id'), nullable=False) + package_id = Column(u'package_id', _types.UnicodeText, ForeignKey('package.id'), nullable=False) + + @classmethod + def create(cls, molecules_id, package_id): + """ + Create a new MoleculeRelData entry and store it in the database. + + :param molecules_id: The ID of the molecule + :param package_id: The ID of the package + :param session: The SQLAlchemy session for database interaction + :return: The created MoleculeRelData instance + """ + new_entry = cls(molecules_id=molecules_id, package_id=package_id) + Session.add(new_entry) + Session.commit() + return new_entry + + @classmethod + def get_package_list_inchi_key(cls, page_size, current_page): + """ + Get the list of InChIKeys and their associated packages IDs. + + :param page_size: The number of records per page + :param current_page: The current page number + :return: InChIKeys and their associated packages IDs (inchi_key, package_ids) + """ + offset_value = (current_page - 1) * page_size + + subquery = Session.query( + cls.molecules_id, + func.string_agg(cls.package_id.cast(_types.String), ', ').label('package_ids') + ).group_by(cls.molecules_id).subquery() + + query = Session.query( + Molecules.inchi_key, + subquery.c.package_ids + ).join( + Molecules, + Molecules.id == subquery.c.molecules_id + ).limit(page_size).offset(offset_value) + + return query.all() + + @classmethod + def get_count_rows(cls): + """ + Get the number of rows/molecules with datasets in molecule_rel_data table. + :return: Number of rows to display "n Molecules Found" + """ + subquery = Session.query( + cls.molecules_id, + cls.package_id + ).distinct(cls.molecules_id).subquery() + + count = Session.query(func.count()).select_from(subquery).scalar() + return count + + @classmethod + def get_mol_formula_by_package_id(cls, package_id): + """ + + :param package_id: + :return: + """ + + molecules_sub_query = Session.query( + cls.molecules_id + ).filter(cls.package_id == package_id).subquery() + + mol_formula = Session.query(Molecules.mol_formula).filter(Molecules.id.in_(molecules_sub_query)).all() + + return mol_formula + + @classmethod + def get_exact_mass_by_package_id(cls, package_id): + """ + + :param package_id: + :return: + """ + molecules_sub_query = Session.query( + cls.molecules_id + ).filter(cls.package_id == package_id).subquery() + + exact_mass = Session.query(Molecules.exact_mass).filter(Molecules.id.in_(molecules_sub_query)).all() + return exact_mass \ No newline at end of file diff --git a/ckanext/rdkit_visuals/models/molecule_tab.py b/ckanext/rdkit_visuals/models/molecule_tab.py index 6b43026..539d745 100644 --- a/ckanext/rdkit_visuals/models/molecule_tab.py +++ b/ckanext/rdkit_visuals/models/molecule_tab.py @@ -1,50 +1,63 @@ # encoding: utf-8 -from six import text_type -from sqlalchemy import orm, types, Column, Table, ForeignKey -from sqlalchemy.ext.associationproxy import association_proxy - -from ckan.model import ( - meta, - core, - package as _package, - extension, - domain_object, - types as _types, -) - - -__all__ = [u'MolecularData', u'molecule_data_table'] - -molecule_data_table = Table(u'molecule_data', meta.metadata, - Column(u'id', types.Integer, primary_key = True, nullable = False), - Column(u'package_id', types.UnicodeText, ForeignKey('package.id'), nullable = False), - Column(u'inchi',types.UnicodeText), - Column(u'smiles',types.UnicodeText), - Column(u'inchi_key', types.UnicodeText), - Column(u'exact_mass', types.UnicodeText) +from sqlalchemy import Column, ForeignKey, func, String, Float +from sqlalchemy.orm import relationship +from sqlalchemy import orm + +from ckan.model import meta, Package, domain_object +from sqlalchemy import types as _types +from ckan.model import Session +from ckan.model import meta +from .base import Base + + + +class Molecules(Base): + __tablename__ = 'molecules' + + """ + Molecules is an essential table for storing the molecular information in a database using RDKit visuals while + harvesting the metadata through CKAN harvesters. + + """ + + id = Column(_types.Integer, primary_key=True, autoincrement=True) + package_id = Column(_types.Integer, ForeignKey('package.id')) + inchi = Column(_types.String) + smiles = Column(_types.String) + inchi_key = Column(_types.String) + exact_mass = Column(Float) + mol_formula = Column(_types.String) + + # Relationship with the Package model + #package = relationship('Package') + + # Additional methods can be added here as needed + + @classmethod + def create(cls, package_id, inchi, smiles, inchi_key, exact_mass, mol_formula): + """ + Create a new Molecule entry and store it in the database. + + :param package_id: The ID of the package + :param inchi: InChI string for the molecule + :param smiles: SMILES string for the molecule + :param inchi_key: InChI key for the molecule + :param exact_mass: The exact mass of the molecule + :param mol_formula: The molecular formula of the molecule + :param session: The SQLAlchemy session for database interaction + :return: The created Molecule instance + """ + new_molecule = cls( + package_id=package_id, + inchi=inchi, + smiles=smiles, + inchi_key=inchi_key, + exact_mass=exact_mass, + mol_formula=mol_formula ) + Session.add(new_molecule) + Session.commit() + return new_molecule - -class MolecularData(domain_object.DomainObject): - def __init__(self, related_object): - self.package_id = related_object.get('package_id') - self.inchi = related_object.get('inchi') - self.smiles = related_object.get('smiles') - self.inchi_key = related_object.get('inchi_key') - self.exact_mass = related_object.get('exact_mass') - - - - -meta.mapper( - MolecularData, - molecule_data_table, - properties={ - u"package": orm.relation( - _package, backref=orm.backref(u"molecule_data", cascade=u"all, delete, delete-orphan") - ) - }, -) -