Skip to content

Commit

Permalink
added mssing doc strings, tidy a regex in mzIdParser, rename variable…
Browse files Browse the repository at this point in the history
… in schema_validate.py
  • Loading branch information
colin-combe committed Dec 11, 2024
1 parent 4ea4f32 commit bd35718
Show file tree
Hide file tree
Showing 26 changed files with 223 additions and 85 deletions.
9 changes: 9 additions & 0 deletions config/config_parser.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
"""
config_parser.py
"""
from configparser import ConfigParser
import os


def parse_config(filename, section='postgresql'):
"""
Parse database.ini file
:param filename:
:param section:
:return:
"""
# create a parser
parser = ConfigParser()
# read config file
Expand Down
3 changes: 3 additions & 0 deletions config/database.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
"""
sessions used by sqlalchemy
"""
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from config_parser import get_conn_str
Expand Down
8 changes: 7 additions & 1 deletion config/index.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
"""
index.py
This file contains helper function to get database session
"""
from config.database import SessionLocal


# Helper function to get database session
def get_session():
"""
Helper function to get database session
"""
session = SessionLocal()
try:
yield session
Expand Down
4 changes: 4 additions & 0 deletions models/analysiscollectionspectrumidentification.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
This file contains the AnalysisCollectionSpectrumIdentification class,
which is a SQLAlchemy model for the analysiscollectionspectrumidentification table in the database.
"""
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import ForeignKey, Text, ForeignKeyConstraint, Integer, Any, JSON
from models.base import Base
Expand Down
3 changes: 3 additions & 0 deletions models/base.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
"""
Base class for all models.
"""
from sqlalchemy.orm import DeclarativeBase


Expand Down
1 change: 1 addition & 0 deletions models/dbsequence.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""This file contains the DBSequence class, which is a SQLAlchemy model for the dbsequence table in the database."""
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import ForeignKey, Text, Integer
from models.base import Base
Expand Down
1 change: 1 addition & 0 deletions models/enzyme.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""This file contains the Enzyme class, which is a SQLAlchemy model for the enzyme table in the database."""
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import ForeignKey, Text, BOOLEAN, ForeignKeyConstraint, Integer
from models.base import Base
Expand Down
1 change: 1 addition & 0 deletions models/match.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""This file contains the Match class, which is a SQLAlchemy model for the match table in the database."""
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import ForeignKey, Text, FLOAT, JSON, BOOLEAN, Integer, ForeignKeyConstraint, CHAR, Index
from models.base import Base
Expand Down
4 changes: 4 additions & 0 deletions models/modifiedpeptide.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
This file contains the ModifiedPeptide class,
which is a SQLAlchemy model for the modifiedpeptide table in the database.
"""
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import ForeignKey, Text, Integer, JSON, FLOAT, Index
from models.base import Base
Expand Down
4 changes: 4 additions & 0 deletions models/peptideevidence.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""
This file contains the PeptideEvidence class,
which is a SQLAlchemy model for the peptideevidence table in the database.
"""
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import ForeignKey, Text, Integer, BOOLEAN, ForeignKeyConstraint, Index
from models.base import Base
Expand Down
2 changes: 1 addition & 1 deletion models/spectradata.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import ForeignKey, Text, Integer, UniqueConstraint
from sqlalchemy import ForeignKey, Text, Integer
from models.base import Base


Expand Down
8 changes: 8 additions & 0 deletions parser/APIWriter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""APIWriter.py - Class for writing results via an API."""
import traceback
import requests
import json
Expand Down Expand Up @@ -117,6 +118,13 @@ def write_mzid_info(self, analysis_software_list, spectra_formats,
return None

def write_other_info(self, contains_crosslinks, upload_warnings, upload_id):
"""
Update Upload row with remaining info.
:param contains_crosslinks:
:param upload_warnings:
:param upload_id:
:return:
"""
response = None
try:
# todo: use urljoin
Expand Down
11 changes: 9 additions & 2 deletions parser/DatabaseWriter.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""DatabaseWriter class for writing results to a postgresql relational database."""
from sqlalchemy import create_engine, MetaData
from sqlalchemy import Table

Expand Down Expand Up @@ -42,6 +43,12 @@ def write_data(self, table, data):
conn.close()

def write_new_upload(self, table, data):
"""
Insert data into upload table and return the id of the new row.
:param table:
:param data:
:return:
"""
table = Table(table, self.meta, autoload_with=self.engine, quote=False)
with self.engine.connect() as conn:
statement = table.insert().values(data).returning(table.columns[0]) # RETURNING id AS upload_id
Expand All @@ -54,8 +61,6 @@ def write_mzid_info(self, analysis_software_list, spectra_formats,
provider, audits, samples, bib, upload_id):
"""
Update Upload row with mzid info.
ToDo: have this explicitly or create update func?
:param analysis_software_list: (list) List of analysis software used.
:param spectra_formats:
:param provider:
Expand All @@ -66,6 +71,7 @@ def write_mzid_info(self, analysis_software_list, spectra_formats,
:return:
"""
upload = Table("upload", self.meta, autoload_with=self.engine, quote=False)
# noinspection PyTypeChecker
stmt = upload.update().where(upload.c.id == str(upload_id)).values(
analysis_software_list=analysis_software_list,
spectra_formats=spectra_formats,
Expand All @@ -90,6 +96,7 @@ def write_other_info(self, contains_crosslinks, upload_warnings, upload_id):
"""
upload = Table("upload", self.meta, autoload_with=self.engine, quote=False)
with self.engine.connect() as conn:
# noinspection PyTypeChecker
stmt = upload.update().where(upload.c.id == str(upload_id)).values(
contains_crosslinks=contains_crosslinks,
upload_warnings=upload_warnings,
Expand Down
21 changes: 19 additions & 2 deletions parser/MzIdParser.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
"""
converts mzIdentML files to DB entries
"""
import base64
import gzip
import json
Expand All @@ -22,6 +25,7 @@


class MzIdParseException(Exception):
"""Exception raised when parsing mzIdentML files."""
pass


Expand Down Expand Up @@ -102,6 +106,10 @@ def parse(self):

@staticmethod
def check_spectra_data_validity(sp_datum):
"""
Check if the SpectraData element is valid.
:param sp_datum:
"""
# is there anything we'd like to complain about?
# SpectrumIDFormat
if 'SpectrumIDFormat' not in sp_datum or sp_datum['SpectrumIDFormat'] is None:
Expand Down Expand Up @@ -664,7 +672,7 @@ def main_loop(self):
main_loop_start_time = time()
self.logger.info('main loop - start')

msi_regex = re.compile(r'^([0-9]+)(?::(P|C))$')
msi_regex = re.compile(r'^([0-9]+):([PC])$')

spec_count = 0
spectra = []
Expand Down Expand Up @@ -814,6 +822,7 @@ def main_loop(self):

# noinspection PyBroadException
def upload_info(self):
"""write mzid file level info to the DB."""
upload_info_start_time = time()
self.logger.info('parse upload info - start')
self.mzid_reader.reset()
Expand Down Expand Up @@ -864,6 +873,9 @@ def upload_info(self):
round(time() - upload_info_start_time, 2)))

def fill_in_missing_scores(self):
"""
Legacy xiSPEC, ignore
"""
pass

def write_new_upload(self):
Expand Down Expand Up @@ -941,6 +953,11 @@ def get_cv_params(self, element, super_cls_accession=None):
# split into two functions
@staticmethod
def extract_mzid(archive):
"""
Extract the files from the archive.
:param archive:
:return:
"""
if archive.endswith('zip'):
zip_ref = zipfile.ZipFile(archive, 'r')
unzip_path = archive + '_unzip/'
Expand Down Expand Up @@ -1043,7 +1060,7 @@ def write_new_upload(self):
}
table = 'upload'

response = self.writer.write_data(table, upload_data)
self.writer.write_data(table, upload_data)
except SQLAlchemyError as e:
print(f"Error during database insert: {e}")

Expand Down
17 changes: 0 additions & 17 deletions parser/NumpyEncoder.py

This file was deleted.

14 changes: 14 additions & 0 deletions parser/SimpleFASTA.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
"""SimpleFASTA.py - Parse a FASTA file and return a dictionary of the sequences."""
import re


# why aren't we using pyteomics? todo? probably we haven't look at yet coz its only used by cvs parsers not mzid

# noinspection PyUnusedLocal
def get_db_sequence_dict(fasta_file_list):
"""
Parse a FASTA file and return a dictionary of the sequences.
:param fasta_file_list:
:return: dict
"""
db_sequence_dict = {}
identifier = None
sequence = ""
Expand Down Expand Up @@ -39,6 +45,14 @@ def get_db_sequence_dict(fasta_file_list):


def add_entry(identifier, sequence, description, seq_dict):
"""
Add an entry to the sequence dictionary.
:param identifier:
:param sequence:
:param description:
:param seq_dict:
:return: None
"""
m = re.search(r'..\|(.*)\|(.*)\s?', identifier)
# id = identifier
accession = identifier
Expand Down
30 changes: 29 additions & 1 deletion parser/Writer.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,54 @@
"""Writer.py - Abstract class for writing results to a database."""
from abc import ABC, abstractmethod


# Strategy interface
class Writer(ABC):

"""
Interface for writing results to a database.
"""
def __init__(self, upload_id=None, pxid=None):
self.pxid = pxid
self.upload_id = upload_id

@abstractmethod
def write_data(self, table, data):
"""
Insert data into table.
:param table:
:param data:
"""
pass

@abstractmethod
def write_new_upload(self, table, data):
"""
Insert data into upload table and, if postgres, return the id of the new row.
:param table:
:param data:
"""
pass

@abstractmethod
def write_mzid_info(self, analysis_software_list, spectra_formats,
provider, audits, samples, bib, upload_id):
"""
Update the mzid_info table with the given data.
:param analysis_software_list:
:param spectra_formats:
:param provider:
:param audits:
:param samples:
:param bib:
:param upload_id:
"""
pass

@abstractmethod
def fill_in_missing_scores(self):
"""
Legacy xiSPEC thing, can be ignored,
just leaving in rather than creating a backwards compatibility issue for xiSPEC
todo - probably remove
"""
pass
1 change: 1 addition & 0 deletions parser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
"""parser module"""
from .csv_parser.FullCsvParser import *
from .csv_parser.LinksOnlyCsvParser import *
from .csv_parser.NoPeakListsCsvParser import *
Expand Down
Loading

0 comments on commit bd35718

Please sign in to comment.