diff --git a/config/config_parser.py b/config/config_parser.py
index 763476b..43916d6 100644
--- a/config/config_parser.py
+++ b/config/config_parser.py
@@ -1,8 +1,17 @@
+"""
+config_parser.py
+"""
 from configparser import ConfigParser
 import os
 
 
 def parse_config(filename, section='postgresql'):
+    """
+    Parse database.ini file
+    :param filename:
+    :param section:
+    :return:
+    """
     # create a parser
     parser = ConfigParser()
     # read config file
diff --git a/config/database.py b/config/database.py
index 4ab0f2f..3cc2fe4 100644
--- a/config/database.py
+++ b/config/database.py
@@ -1,3 +1,6 @@
+"""
+sessions used by sqlalchemy
+"""
 from sqlalchemy import create_engine
 from sqlalchemy.orm import sessionmaker
 from config_parser import get_conn_str
diff --git a/config/index.py b/config/index.py
index b9929b7..44db643 100644
--- a/config/index.py
+++ b/config/index.py
@@ -1,8 +1,14 @@
+"""
+index.py
+This file contains helper function to get database session
+"""
 from config.database import SessionLocal
 
 
-# Helper function to get database session
 def get_session():
+    """
+    Helper function to get database session
+    """
     session = SessionLocal()
     try:
         yield session
diff --git a/models/analysiscollectionspectrumidentification.py b/models/analysiscollectionspectrumidentification.py
index 64c4f6b..cb89240 100644
--- a/models/analysiscollectionspectrumidentification.py
+++ b/models/analysiscollectionspectrumidentification.py
@@ -1,3 +1,7 @@
+"""
+This file contains the AnalysisCollectionSpectrumIdentification class,
+which is a SQLAlchemy model for the analysiscollectionspectrumidentification table in the database.
+"""
 from sqlalchemy.orm import Mapped, mapped_column
 from sqlalchemy import ForeignKey, Text, ForeignKeyConstraint, Integer, Any, JSON
 from models.base import Base
diff --git a/models/base.py b/models/base.py
index fa2b68a..e0fd8ca 100644
--- a/models/base.py
+++ b/models/base.py
@@ -1,3 +1,6 @@
+"""
+Base class for all models.
+"""
 from sqlalchemy.orm import DeclarativeBase
 
 
diff --git a/models/dbsequence.py b/models/dbsequence.py
index ef26be3..6270e45 100644
--- a/models/dbsequence.py
+++ b/models/dbsequence.py
@@ -1,3 +1,4 @@
+"""This file contains the DBSequence class, which is a SQLAlchemy model for the dbsequence table in the database."""
 from sqlalchemy.orm import Mapped, mapped_column
 from sqlalchemy import ForeignKey, Text, Integer
 from models.base import Base
diff --git a/models/enzyme.py b/models/enzyme.py
index 2fa0f2e..deb8951 100644
--- a/models/enzyme.py
+++ b/models/enzyme.py
@@ -1,3 +1,4 @@
+"""This file contains the Enzyme class, which is a SQLAlchemy model for the enzyme table in the database."""
 from sqlalchemy.orm import Mapped, mapped_column
 from sqlalchemy import ForeignKey, Text, BOOLEAN, ForeignKeyConstraint, Integer
 from models.base import Base
diff --git a/models/match.py b/models/match.py
index 640e33c..514332e 100644
--- a/models/match.py
+++ b/models/match.py
@@ -1,3 +1,4 @@
+"""This file contains the Match class, which is a SQLAlchemy model for the match table in the database."""
 from sqlalchemy.orm import Mapped, mapped_column
 from sqlalchemy import ForeignKey, Text, FLOAT, JSON, BOOLEAN, Integer, ForeignKeyConstraint, CHAR, Index
 from models.base import Base
diff --git a/models/modifiedpeptide.py b/models/modifiedpeptide.py
index a6aa9a6..ce488e6 100644
--- a/models/modifiedpeptide.py
+++ b/models/modifiedpeptide.py
@@ -1,3 +1,7 @@
+"""
+This file contains the ModifiedPeptide class,
+which is a SQLAlchemy model for the modifiedpeptide table in the database.
+"""
 from sqlalchemy.orm import Mapped, mapped_column
 from sqlalchemy import ForeignKey, Text, Integer, JSON, FLOAT, Index
 from models.base import Base
diff --git a/models/peptideevidence.py b/models/peptideevidence.py
index 10289e7..ca0af9c 100644
--- a/models/peptideevidence.py
+++ b/models/peptideevidence.py
@@ -1,3 +1,7 @@
+"""
+This file contains the PeptideEvidence class,
+which is a SQLAlchemy model for the peptideevidence table in the database.
+"""
 from sqlalchemy.orm import Mapped, mapped_column
 from sqlalchemy import ForeignKey, Text, Integer, BOOLEAN, ForeignKeyConstraint, Index
 from models.base import Base
diff --git a/models/spectradata.py b/models/spectradata.py
index b3b8e40..6c1b188 100644
--- a/models/spectradata.py
+++ b/models/spectradata.py
@@ -1,5 +1,5 @@
 from sqlalchemy.orm import Mapped, mapped_column
-from sqlalchemy import ForeignKey, Text, Integer, UniqueConstraint
+from sqlalchemy import ForeignKey, Text, Integer
 from models.base import Base
 
 
diff --git a/parser/APIWriter.py b/parser/APIWriter.py
index 045765c..fa77f68 100644
--- a/parser/APIWriter.py
+++ b/parser/APIWriter.py
@@ -1,3 +1,4 @@
+"""APIWriter.py - Class for writing results via an API."""
 import traceback
 import requests
 import json
@@ -117,6 +118,13 @@ def write_mzid_info(self, analysis_software_list, spectra_formats,
             return None
 
     def write_other_info(self, contains_crosslinks, upload_warnings, upload_id):
+        """
+        Update Upload row with remaining info.
+        :param contains_crosslinks:
+        :param upload_warnings:
+        :param upload_id:
+        :return:
+        """
         response = None
         try:
             # todo: use urljoin
diff --git a/parser/DatabaseWriter.py b/parser/DatabaseWriter.py
index 39953e4..242eefb 100644
--- a/parser/DatabaseWriter.py
+++ b/parser/DatabaseWriter.py
@@ -1,3 +1,4 @@
+"""DatabaseWriter class for writing results to a postgresql relational database."""
 from sqlalchemy import create_engine, MetaData
 from sqlalchemy import Table
 
@@ -42,6 +43,12 @@ def write_data(self, table, data):
             conn.close()
 
     def write_new_upload(self, table, data):
+        """
+        Insert data into upload table and return the id of the new row.
+        :param table:
+        :param data:
+        :return:
+        """
         table = Table(table, self.meta, autoload_with=self.engine, quote=False)
         with self.engine.connect() as conn:
             statement = table.insert().values(data).returning(table.columns[0])  # RETURNING id AS upload_id
@@ -54,8 +61,6 @@ def write_mzid_info(self, analysis_software_list, spectra_formats,
                         provider, audits, samples, bib, upload_id):
         """
         Update Upload row with mzid info.
-
-        ToDo: have this explicitly or create update func?
         :param analysis_software_list: (list) List of analysis software used.
         :param spectra_formats:
         :param provider:
@@ -66,6 +71,7 @@ def write_mzid_info(self, analysis_software_list, spectra_formats,
         :return:
         """
         upload = Table("upload", self.meta, autoload_with=self.engine, quote=False)
+        # noinspection PyTypeChecker
         stmt = upload.update().where(upload.c.id == str(upload_id)).values(
             analysis_software_list=analysis_software_list,
             spectra_formats=spectra_formats,
@@ -90,6 +96,7 @@ def write_other_info(self, contains_crosslinks, upload_warnings, upload_id):
         """
         upload = Table("upload", self.meta, autoload_with=self.engine, quote=False)
         with self.engine.connect() as conn:
+            # noinspection PyTypeChecker
             stmt = upload.update().where(upload.c.id == str(upload_id)).values(
                 contains_crosslinks=contains_crosslinks,
                 upload_warnings=upload_warnings,
diff --git a/parser/MzIdParser.py b/parser/MzIdParser.py
index d542dc1..d82fb46 100644
--- a/parser/MzIdParser.py
+++ b/parser/MzIdParser.py
@@ -1,3 +1,6 @@
+"""
+converts mzIdentML files to DB entries
+"""
 import base64
 import gzip
 import json
@@ -22,6 +25,7 @@
 
 
 class MzIdParseException(Exception):
+    """Exception raised when parsing mzIdentML files."""
     pass
 
 
@@ -102,6 +106,10 @@ def parse(self):
 
     @staticmethod
     def check_spectra_data_validity(sp_datum):
+        """
+        Check if the SpectraData element is valid.
+        :param sp_datum:
+        """
         # is there anything we'd like to complain about?
         # SpectrumIDFormat
         if 'SpectrumIDFormat' not in sp_datum or sp_datum['SpectrumIDFormat'] is None:
@@ -664,7 +672,7 @@ def main_loop(self):
         main_loop_start_time = time()
         self.logger.info('main loop - start')
 
-        msi_regex = re.compile(r'^([0-9]+)(?::(P|C))$')
+        msi_regex = re.compile(r'^([0-9]+):([PC])$')
 
         spec_count = 0
         spectra = []
@@ -814,6 +822,7 @@ def main_loop(self):
 
     # noinspection PyBroadException
     def upload_info(self):
+        """write mzid file level info to the DB."""
         upload_info_start_time = time()
         self.logger.info('parse upload info - start')
         self.mzid_reader.reset()
@@ -864,6 +873,9 @@ def upload_info(self):
             round(time() - upload_info_start_time, 2)))
 
     def fill_in_missing_scores(self):
+        """
+        Legacy xiSPEC, ignore
+        """
         pass
 
     def write_new_upload(self):
@@ -941,6 +953,11 @@ def get_cv_params(self, element, super_cls_accession=None):
     # split into two functions
     @staticmethod
     def extract_mzid(archive):
+        """
+        Extract the files from the archive.
+        :param archive:
+        :return:
+        """
         if archive.endswith('zip'):
             zip_ref = zipfile.ZipFile(archive, 'r')
             unzip_path = archive + '_unzip/'
@@ -1043,7 +1060,7 @@ def write_new_upload(self):
             }
             table = 'upload'
 
-            response = self.writer.write_data(table, upload_data)
+            self.writer.write_data(table, upload_data)
         except SQLAlchemyError as e:
             print(f"Error during database insert: {e}")
 
diff --git a/parser/NumpyEncoder.py b/parser/NumpyEncoder.py
deleted file mode 100644
index cb69815..0000000
--- a/parser/NumpyEncoder.py
+++ /dev/null
@@ -1,17 +0,0 @@
-import json
-
-
-class NumpyEncoder(json.JSONEncoder):
-    # def default(self, obj):
-    #     if isinstance(obj, np.ndarray):
-    #         return obj.tolist()
-    #     return json.JSONEncoder.default(self, obj)
-    def default(self, o):
-        try:
-            iterable = iter(o)
-        except TypeError:
-            pass
-        else:
-            return list(iterable)
-        # Let the base class default method raise the TypeError
-        return json.JSONEncoder.default(self, o)
diff --git a/parser/SimpleFASTA.py b/parser/SimpleFASTA.py
index e7083e1..3378c43 100644
--- a/parser/SimpleFASTA.py
+++ b/parser/SimpleFASTA.py
@@ -1,3 +1,4 @@
+"""SimpleFASTA.py - Parse a FASTA file and return a dictionary of the sequences."""
 import re
 
 
@@ -5,6 +6,11 @@
 
 # noinspection PyUnusedLocal
 def get_db_sequence_dict(fasta_file_list):
+    """
+    Parse a FASTA file and return a dictionary of the sequences.
+    :param fasta_file_list:
+    :return: dict
+    """
     db_sequence_dict = {}
     identifier = None
     sequence = ""
@@ -39,6 +45,14 @@ def get_db_sequence_dict(fasta_file_list):
 
 
 def add_entry(identifier, sequence, description, seq_dict):
+    """
+    Add an entry to the sequence dictionary.
+    :param identifier:
+    :param sequence:
+    :param description:
+    :param seq_dict:
+    :return: None
+    """
     m = re.search(r'..\|(.*)\|(.*)\s?', identifier)
     # id = identifier
     accession = identifier
diff --git a/parser/Writer.py b/parser/Writer.py
index 0001720..71ea36e 100644
--- a/parser/Writer.py
+++ b/parser/Writer.py
@@ -1,26 +1,54 @@
+"""Writer.py - Abstract class for writing results to a database."""
 from abc import ABC, abstractmethod
 
 
 # Strategy interface
 class Writer(ABC):
-
+    """
+    Interface for writing results to a database.
+    """
     def __init__(self, upload_id=None, pxid=None):
         self.pxid = pxid
         self.upload_id = upload_id
 
     @abstractmethod
     def write_data(self, table, data):
+        """
+        Insert data into table.
+        :param table:
+        :param data:
+        """
         pass
 
     @abstractmethod
     def write_new_upload(self, table, data):
+        """
+        Insert data into upload table and, if postgres, return the id of the new row.
+        :param table:
+        :param data:
+        """
         pass
 
     @abstractmethod
     def write_mzid_info(self, analysis_software_list, spectra_formats,
                         provider, audits, samples, bib, upload_id):
+        """
+        Update the mzid_info table with the given data.
+        :param analysis_software_list:
+        :param spectra_formats:
+        :param provider:
+        :param audits:
+        :param samples:
+        :param bib:
+        :param upload_id:
+        """
         pass
 
     @abstractmethod
     def fill_in_missing_scores(self):
+        """
+        Legacy xiSPEC thing, can be ignored,
+        just leaving in rather than creating a backwards compatibility issue for xiSPEC
+        todo - probably remove
+        """
         pass
diff --git a/parser/__init__.py b/parser/__init__.py
index 52b7fef..f739656 100644
--- a/parser/__init__.py
+++ b/parser/__init__.py
@@ -1,3 +1,4 @@
+"""parser module"""
 from .csv_parser.FullCsvParser import *
 from .csv_parser.LinksOnlyCsvParser import *
 from .csv_parser.NoPeakListsCsvParser import *
diff --git a/parser/csv_parser/AbstractCsvParser.py b/parser/csv_parser/AbstractCsvParser.py
index 62bd55b..cc87729 100644
--- a/parser/csv_parser/AbstractCsvParser.py
+++ b/parser/csv_parser/AbstractCsvParser.py
@@ -1,3 +1,4 @@
+"""Abstract class for csv parsers."""
 import abc
 import os
 from time import time
@@ -18,22 +19,34 @@ class CsvParseException(Exception):
 
 
 class MissingFileException(Exception):
+    """
+    Exception raised for missing files.
+    todo - reuse other exception?
+    """
     pass
 
 
 class AbstractCsvParser(abc.ABC):
     """
-
+    Abstract class for csv parsers.
     """
 
     @property
     @abc.abstractmethod
     def required_cols(self):
+        """
+        Get required column names in csv file.
+        :return: list of strings
+        """
         pass
 
     @property
     @abc.abstractmethod
     def optional_cols(self):
+        """
+        Get optional column names in csv file.
+        :return: list of strings
+        """
         pass
 
     default_values = {
@@ -129,12 +142,22 @@ def __init__(self, csv_path, temp_dir, peak_list_dir, writer, logger):
         # self.csv_reader.fillna('Null', inplace=True)
 
     def check_required_columns(self):
+        """
+        Check if all required columns are present in the csv file.
+        todo - return type / raising exception is not consistent
+        :return: bool
+        :raises CsvParseException: if a required column is missing
+        """
         for required_col in self.required_cols:
             if required_col not in self.csv_reader.columns:
                 raise CsvParseException("Required csv column %s missing" % required_col)
         return True
 
     def get_missing_required_columns(self):
+        """
+        Get missing required columns in the csv file.
+        :return: list of strings
+        """
         missing_cols = []
         for required_col in self.required_cols:
             if required_col not in self.csv_reader.columns:
@@ -149,6 +172,9 @@ def get_peak_list_file_names(self):
         return self.csv_reader.peaklistfilename.unique()
 
     def get_sequence_db_file_names(self):
+        """
+        :return: list of all used sequence db file names
+        """
         fasta_files = []
         for file in os.listdir(self.temp_dir):
             if file.endswith(".fasta") or file.endswith(".FASTA"):
@@ -202,7 +228,9 @@ def set_peak_list_readers(self):
         self.peak_list_readers = peak_list_readers
 
     def parse(self):
-
+        """
+        Parse csv file.
+        """
         start_time = time()
 
         # ToDo: more gracefully handle missing files
@@ -224,6 +252,9 @@ def parse(self):
 
     @abc.abstractmethod
     def main_loop(self):
+        """
+        Main loop for parsing the csv.
+        """
         pass
 
     # @staticmethod
@@ -243,12 +274,18 @@ def main_loop(self):
     #     return masses
 
     def parse_db_sequences(self):
+        """
+        Parse db sequences.
+        """
         self.logger.info('reading fasta - start')
         self.start_time = time()
         self.fasta = SimpleFASTA.get_db_sequence_dict(self.get_sequence_db_file_names())
         self.logger.info('reading fasta - done. Time: ' + str(round(time() - self.start_time, 2)) + " sec")
 
     def upload_info(self):
+        """
+        Write new upload to database.
+        """
         self.logger.info('new csv upload')
         # # ident_file_size = os.path.getsize(self.csv_path)
         # # peak_list_file_names = json.dumps(self.get_peak_list_file_names(), cls=NumpyEncoder)
@@ -260,7 +297,9 @@ def upload_info(self):
         # self.writer.write_mzid_info(spectra_formats, provider, audits, samples, bib_refs)
 
     def write_new_upload(self):
-        """Write new upload."""
+        """Write new upload todatabase.
+        :raises Exception: if there is an error writing to the database.
+        """
         upload_data = {
             # 'id': self.writer.upload_id,
             # 'user_id': self.writer.user_id,
diff --git a/parser/csv_parser/FullCsvParser.py b/parser/csv_parser/FullCsvParser.py
index 68a1018..ce45483 100644
--- a/parser/csv_parser/FullCsvParser.py
+++ b/parser/csv_parser/FullCsvParser.py
@@ -1,3 +1,4 @@
+""""""
 from .AbstractCsvParser import AbstractCsvParser, CsvParseException
 from time import time
 import re
diff --git a/parser/database/create_db_schema.py b/parser/database/create_db_schema.py
index 76f2509..bd1e044 100644
--- a/parser/database/create_db_schema.py
+++ b/parser/database/create_db_schema.py
@@ -1,5 +1,7 @@
-import logging.config
-
+"""
+create_db_schema.py
+This script creates a database and schema for the application.
+"""
 from sqlalchemy import create_engine
 from sqlalchemy_utils import database_exists, drop_database, create_database
 
@@ -9,20 +11,34 @@
 
 
 def create_db(connection_str):
+    """
+    Create a database if it doesn't exist.
+    :param connection_str:
+    :return: None
+    """
     engine = create_engine(connection_str)
     if not database_exists(engine.url):
         create_database(engine.url)
 
 
 def drop_db(connection_str):
+    """
+    Drop a database if it exists.
+    :param connection_str:
+    :return: None
+    """
     engine = create_engine(connection_str)
     drop_database(engine.url)
 
 
 def create_schema(connection_str):
+    """
+    Create schema for the database.
+    :param connection_str:
+    :return: None
+    """
     engine = create_engine(connection_str)  # , echo=True)
     Base.metadata.create_all(engine)
-    # logging.info(Base.metadata.tables)
     engine.dispose()
 
 
diff --git a/parser/database/guid.py b/parser/database/guid.py
deleted file mode 100644
index ddcce63..0000000
--- a/parser/database/guid.py
+++ /dev/null
@@ -1,39 +0,0 @@
-from sqlalchemy.types import TypeDecorator, CHAR
-from sqlalchemy.dialects.postgresql import UUID
-import uuid
-
-
-class GUID(TypeDecorator):
-    """Platform-independent GUID type.
-
-    Uses PostgreSQL's UUID type, otherwise uses
-    CHAR(32), storing as stringified hex values.
-    """
-    impl = CHAR
-    cache_ok = True
-
-    def load_dialect_impl(self, dialect):
-        if dialect.name == 'postgresql':
-            return dialect.type_descriptor(UUID())
-        else:
-            return dialect.type_descriptor(CHAR(32))
-
-    def process_bind_param(self, value, dialect):
-        if value is None:
-            return value
-        elif dialect.name == 'postgresql':
-            return str(value)
-        else:
-            if not isinstance(value, uuid.UUID):
-                return "%.32x" % uuid.UUID(value).int
-            else:
-                # hexstring
-                return "%.32x" % value.int
-
-    def process_result_value(self, value, dialect):
-        if value is None:
-            return value
-        else:
-            if not isinstance(value, uuid.UUID):
-                value = uuid.UUID(value)
-            return value
diff --git a/parser/peaklistReader/PeakListWrapper.py b/parser/peaklistReader/PeakListWrapper.py
index c505562..fb46085 100644
--- a/parser/peaklistReader/PeakListWrapper.py
+++ b/parser/peaklistReader/PeakListWrapper.py
@@ -1,3 +1,6 @@
+"""
+PeakListWrapper.py
+"""
 import ntpath
 import zipfile
 import re
@@ -9,20 +12,26 @@
 import io
 import tarfile
 
-
+#todo -check error handling
 class PeakListParseError(Exception):
+    """raised if error reading peaklist, invalid spectrum id or spectrum not found in peaklist file."""
     pass
 
 
 class SpectrumIdFormatError(Exception):
+    """raised if the spectrum id format is not supported by the reader."""
     pass
 
 
 class ScanNotFoundException(Exception):
+    """raised if the scan is not found in the mzML file."""
     pass
 
 
 class Spectrum:
+    """
+    A class to represent a spectrum.
+    """
     def __init__(self, precursor, mz_array, int_array, rt=np.nan):
         """
         Initialise a Spectrum object.
@@ -45,6 +54,9 @@ def __init__(self, precursor, mz_array, int_array, rt=np.nan):
 
 
 class PeakListWrapper:
+    """
+    A class to wrap peak list files and provide an interface to access the spectra.
+    """
     def __init__(self, pl_path, file_format_accession, spectrum_id_format_accession):
         self.file_format_accession = file_format_accession
         self.spectrum_id_format_accession = spectrum_id_format_accession
@@ -71,16 +83,31 @@ def __getitem__(self, spec_id):
         return self.reader[spec_id]
 
     def is_mgf(self):
+        """
+        Check if the peak list is in MGF format.
+        :return: bbol
+        """
         return self.file_format_accession == 'MS:1001062'
 
     def is_mzml(self):
+        """
+        Check if the peak list is in mzML format.
+        :return: bool
+        """
         return self.file_format_accession == 'MS:1000584'
 
     def is_ms2(self):
+        """
+        Check if the peak list is in MS2 format.
+        :return: bool
+        """
         return self.file_format_accession == 'MS:1001466'
 
     @staticmethod
     def extract_gz(in_file):
+        """
+        Extract gzipped file.
+        """
         if in_file.endswith('.gz'):
             in_f = gzip.open(in_file, 'rb')
             in_file = in_file.replace(".gz", "")
@@ -101,7 +128,7 @@ def unzip_peak_lists(zip_file, out_path='.'):
 
         :param zip_file: path to archive to unzip
         :param out_path: where to extract the files
-        :return: resulting folder
+        :return: path to resulting folder
         """
         if zip_file.endswith(".zip"):
             zip_ref = zipfile.ZipFile(zip_file, 'r')
diff --git a/parser/peaklistReader/__init__.py b/parser/peaklistReader/__init__.py
index e69de29..bd3906c 100644
--- a/parser/peaklistReader/__init__.py
+++ b/parser/peaklistReader/__init__.py
@@ -0,0 +1 @@
+"""peaklistReader module"""
\ No newline at end of file
diff --git a/parser/process_dataset.py b/parser/process_dataset.py
index de41f1e..8a522ca 100644
--- a/parser/process_dataset.py
+++ b/parser/process_dataset.py
@@ -108,17 +108,11 @@ def validate(validate_arg, tmpdir):
     This includes checking that Seq elements are present for target proteins,
     even though omitting them is technically valid.
     Prints out results.
-    Parameters
-    ----------
-    validate_arg : str
+    :param validate_arg: str
         The path to the mzIdentML file or directory to be validated.
-        tmpdir : str
-        The temporary directory to use for validation - an Sqlite DB is created here if given,
-        otherwise an in-memory sqlite DB is used.
-
-    Returns
-    -------
-    None
+    :param tmpdir: str
+        The temporary directory to use for validation - an Sqlite DB is created here.
+    :return: None
     """
     if os.path.isdir(validate_arg):
         print(f'Validating directory: {validate_arg}')
@@ -348,7 +342,7 @@ def convert_from_ftp(ftp_url, temp_dir, project_identifier, writer_method, dontd
             ftp = get_ftp_login(ftp_ip)
             try:
                 ftp.cwd(urlparse(ftp_url).path)
-                ftp.retrbinary(f"RETR {f}", open(os.path.join(path, f), 'wb').write)
+                ftp.retrbinary(f"RETR {f}", open(os.path.join(str(path), f), 'wb').write)
                 ftp.quit()
             except ftplib.error_perm as e:
                 ftp.quit()
diff --git a/parser/schema_validate.py b/parser/schema_validate.py
index d0331c0..7bead20 100644
--- a/parser/schema_validate.py
+++ b/parser/schema_validate.py
@@ -1,9 +1,14 @@
+"""schema_validate.py - Validate an mzIdentML file against 1.2.0 or 1.3.0 schema."""
 import importlib
-import os
 
 from lxml import etree
 
 def schema_validate(xml_file):
+    """
+    Validate an mzIdentML file against 1.2.0 or 1.3.0 schema.
+    :param xml_file: Path to the mzIdentML file.
+    :return: True if the XML is valid, False otherwise.
+    """
     # Parse the XML file
     with open(xml_file, 'r') as xml:
         xml_doc = etree.parse(xml)
@@ -44,8 +49,8 @@ def schema_validate(xml_file):
             # # read from scehma directory
             # schema_file = os.path.join(current_directory, '..', 'schema', schema_fname)
             # # Parse the XSD file
-            with open(schema_file, 'r') as schema_file:
-                schema_root = etree.XML(schema_file.read())
+            with open(schema_file, 'r') as schema_file_stream:
+                schema_root = etree.XML(schema_file_stream.read())
             schema = etree.XMLSchema(schema_root)
 
             # Validate XML against the schema