diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 88088f66..2d9e9106 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -120,7 +120,7 @@ jobs:
       shell: bash -l {0}
       run: |
         cd testingDataset
-        ppanggolin panrgp --anno genomes.gbff.list --cluster clusters.tsv --output readclusterpang --cpu $NUM_CPUS
+        ppanggolin panrgp --anno genomes.gbff.list --cluster clusters.tsv --output readclusterpang  --cpu $NUM_CPUS 
         ppanggolin annotate --anno genomes.gbff.list --output readclusters --cpu $NUM_CPUS
         ppanggolin cluster --clusters clusters.tsv -p readclusters/pangenome.h5 --cpu $NUM_CPUS
         ppanggolin msa --pangenome readclusterpang/pangenome.h5 --partition persistent --phylo -o readclusterpang/msa/ -f --cpu $NUM_CPUS
diff --git a/docs/user/PangenomeAnalyses/pangenomeCluster.md b/docs/user/PangenomeAnalyses/pangenomeCluster.md
index eca6a630..53c118ae 100644
--- a/docs/user/PangenomeAnalyses/pangenomeCluster.md
+++ b/docs/user/PangenomeAnalyses/pangenomeCluster.md
@@ -54,6 +54,11 @@ You can do this from the command line:
 
 An example of what clusters.tsv should look like is provided [here](https://github.com/labgem/PPanGGOLiN/blob/master/testingDataset/clusters.tsv)
 
+```{note}
+When you provide your clustering, *PPanGGOLiN* will translate the representative gene sequence of each family and write it in the HDF5 file.
+```
+
+
 
 ### Defragmentation
 
diff --git a/ppanggolin/align/alignOnPang.py b/ppanggolin/align/alignOnPang.py
index a99ba3a4..eee073b8 100644
--- a/ppanggolin/align/alignOnPang.py
+++ b/ppanggolin/align/alignOnPang.py
@@ -27,7 +27,7 @@
 
 def align_seq_to_pang(target_seq_file: Union[Path, Iterable[Path]], query_seq_files: Union[Path, Iterable[Path]],
                       tmpdir: Path, cpu: int = 1, no_defrag: bool = False, identity: float = 0.8, coverage: float = 0.8,
-                      is_query_nt: bool = False, is_query_slf: bool = False, is_target_nt: bool = False,
+                      query_type: str = "unknow", is_query_slf: bool = False, target_type: str = "unknow",
                       is_target_slf: bool = False, translation_table: int = None) -> Path:
     """
     Align fasta sequence to pangenome sequences. 
@@ -39,34 +39,36 @@ def align_seq_to_pang(target_seq_file: Union[Path, Iterable[Path]], query_seq_fi
     :param no_defrag: Do not apply defragmentation
     :param identity: minimal identity threshold for the alignment
     :param coverage: minimal identity threshold for the alignment
-    :param is_query_nt: Is the sequence file (query) are nucleotide sequences. If True, sequences are translated by mmseqs
+    :param query_type: Sequences type of the file (query). [nucleotide, protein, unknow]
     :param is_query_slf: Is the sequence file (query) with single line fasta. If True, MMSeqs2 database will be with soft link
-    :param is_target_nt: Is the sequences of pangenome (target) are nucleotide sequences. If True, sequences are translated by mmseqs
+    :param target_type: Sequences type of pangenome (target). [nucleotide, aminoacid, protein]
     :param is_target_slf: Is the sequences of pangenome (target) with single line fasta. If True, MMSeqs2 database will be with soft link
     :param translation_table: Translation table to use, if sequences are nucleotide and need to be translated.
 
     :return: Alignment result file
     """
 
-    if is_target_nt:
+    if target_type == "nucleotide":
         logging.getLogger("PPanGGOLiN").debug("Target sequences will be translated by mmseqs with "
                                               f"translation table {translation_table}")
         with create_tmpdir(tmpdir, basename="target_db", keep_tmp=True) as target_db_dir:
             #  Keep is set as true because whether tmpdir is deleted or not target_db_dir will be the same
             target_db = translate_genes(target_seq_file, target_db_dir, cpu, is_target_slf, translation_table)
     else:
+        db_type = 1 if target_type == "protein" else 0
         target_db = create_mmseqs_db([target_seq_file] if isinstance(target_seq_file, Path) else target_seq_file,
-                                     'target_db', tmpdir, db_mode=1 if is_target_slf else 0, db_type=1)
+                                     'target_db', tmpdir, db_mode=1 if is_target_slf else 0, db_type=db_type)
 
-    if is_query_nt:
+    if query_type == "nucleotide":
         logging.getLogger("PPanGGOLiN").debug("Query sequences will be translated by mmseqs "
                                               f"with translation table {translation_table}")
         with create_tmpdir(tmpdir, basename="query_db", keep_tmp=True) as query_db_dir:
             #  Keep is set as true because whether tmpdir is deleted or not target_db_dir will be the same
             query_db = translate_genes(query_seq_files, query_db_dir, cpu, is_query_slf, translation_table)
     else:
+        db_type = 1 if query_type == "protein" else 0
         query_db = create_mmseqs_db([query_seq_files] if isinstance(query_seq_files, Path) else query_seq_files,
-                                    'query_db', tmpdir, db_mode=1 if is_query_slf else 0, db_type=1)
+                                    'query_db', tmpdir, db_mode=1 if is_query_slf else 0, db_type=db_type)
 
     cov_mode = "2"  # coverage of query
     if no_defrag:
@@ -390,7 +392,7 @@ def get_seq_info(seq_to_pang: dict, pangenome: Pangenome, output: Path, draw_rel
 
 
 def get_input_seq_to_family_with_rep(pangenome: Pangenome, sequence_files: Union[Path, Iterable[Path]], output: Path,
-                                     tmpdir: Path, is_input_seq_nt: bool, is_input_slf: bool = False, cpu: int = 1,
+                                     tmpdir: Path, input_type: str = "unknow", is_input_slf: bool = False, cpu: int = 1,
                                      no_defrag: bool = False, identity: float = 0.8, coverage: float = 0.8,
                                      translation_table: int = 11, disable_bar: bool = False
                                      ) -> Tuple[Path, Dict[str, GeneFamily]]:
@@ -404,7 +406,7 @@ def get_input_seq_to_family_with_rep(pangenome: Pangenome, sequence_files: Union
     :param sequence_files: Iterable of paths of FASTA files containing input sequences to align.
     :param output: Path to the output directory where alignment results will be stored.
     :param tmpdir: Temporary directory for intermediate files.
-    :param is_input_seq_nt: Is input sequence file nucleotide sequences.
+    :param input_type: Type of input sequence file. [nucleotide, aminoacid, unknow]
     :param is_input_slf: Is the sequence file with single line fasta. If True, MMSeqs2 database will be with soft link
     :param cpu: Number of CPU cores to use for the alignment (default: 1).
     :param no_defrag: If True, the defragmentation workflow is skipped (default: False).
@@ -421,10 +423,10 @@ def get_input_seq_to_family_with_rep(pangenome: Pangenome, sequence_files: Union
     logging.getLogger("PPanGGOLiN").debug(f'Write gene family sequences in {pangenome_sequences.absolute()}')
     write_gene_fam_sequences(pangenome, pangenome_sequences, add="ppanggolin_", disable_bar=disable_bar)
 
-    align_file = align_seq_to_pang(target_seq_file=pangenome_sequences, query_seq_files=sequence_files,
-                                   tmpdir=tmpdir, cpu=cpu, no_defrag=no_defrag, identity=identity, coverage=coverage,
-                                   is_query_nt=is_input_seq_nt, is_query_slf=is_input_slf,
-                                   is_target_nt=False, is_target_slf=True, translation_table=translation_table)
+    align_file = align_seq_to_pang(target_seq_file=pangenome_sequences, query_seq_files=sequence_files, tmpdir=tmpdir,
+                                   cpu=cpu, no_defrag=no_defrag, identity=identity, coverage=coverage,
+                                   query_type=input_type, is_query_slf=is_input_slf, is_target_slf=True,
+                                   target_type="protein", translation_table=translation_table)
 
     seq2pang, align_file = map_input_gene_to_family_rep_aln(align_file, output, pangenome)
 
@@ -432,7 +434,7 @@ def get_input_seq_to_family_with_rep(pangenome: Pangenome, sequence_files: Union
 
 
 def get_input_seq_to_family_with_all(pangenome: Pangenome, sequence_files:  Union[Path, Iterable[Path]], output: Path,
-                                     tmpdir: Path, is_input_seq_nt: bool, is_input_slf: bool = False, cpu: int = 1,
+                                     tmpdir: Path, input_type: str = "unknow", is_input_slf: bool = False, cpu: int = 1,
                                      no_defrag: bool = False, identity: float = 0.8, coverage: float = 0.8,
                                      translation_table: int = 11, disable_bar: bool = False
                                      ) -> Tuple[Path, Dict[str, GeneFamily]]:
@@ -446,7 +448,7 @@ def get_input_seq_to_family_with_all(pangenome: Pangenome, sequence_files:  Unio
     :param sequence_files: Iterable of paths of FASTA files containing input sequences to align.
     :param output: Path to the output directory where alignment results will be stored.
     :param tmpdir: Temporary directory for intermediate files.
-    :param is_input_seq_nt: Is input sequence file nucleotide sequences.
+    :param input_type: Sequences type of the file (query). [nucleotide, protein, unknow]
     :param is_input_slf: Is the sequence file with single line fasta. If True, MMSeqs2 database will be with soft link
     :param cpu: Number of CPU cores to use for the alignment (default: 1).
     :param no_defrag: If True, the defragmentation workflow is skipped (default: False).
@@ -464,8 +466,8 @@ def get_input_seq_to_family_with_all(pangenome: Pangenome, sequence_files:  Unio
 
     align_file = align_seq_to_pang(target_seq_file=pangenome_sequences, query_seq_files=sequence_files, tmpdir=tmpdir,
                                    cpu=cpu, no_defrag=no_defrag, identity=identity, coverage=coverage,
-                                   is_query_nt=is_input_seq_nt, is_query_slf=is_input_slf, is_target_nt=True,
-                                   is_target_slf=True, translation_table=translation_table)
+                                   query_type=input_type, is_query_slf=is_input_slf, is_target_slf=True,
+                                   target_type="nucleotide", translation_table=translation_table)
 
     seq2pang, align_file = map_input_gene_to_family_all_aln(align_file, output, pangenome)
 
@@ -516,19 +518,19 @@ def align(pangenome: Pangenome, sequence_file: Path, output: Path, identity: flo
         seq_set, is_nucleotide, single_line_fasta = get_seq_ids(seqFileObj)
 
     with create_tmpdir(main_dir=tmpdir, basename="align_input_seq_tmpdir", keep_tmp=keep_tmp) as new_tmpdir:
-
+        input_type = "nucleotide" if is_nucleotide else "unknow"
         if use_representatives:
             align_file, seq2pang = get_input_seq_to_family_with_rep(pangenome, sequence_file, output, new_tmpdir,
-                                                                    is_input_seq_nt=is_nucleotide,
-                                                                    is_input_slf=single_line_fasta,
-                                                                    cpu=cpu, no_defrag=no_defrag, identity=identity,
+                                                                    input_type=input_type,
+                                                                    is_input_slf=single_line_fasta, cpu=cpu,
+                                                                    no_defrag=no_defrag, identity=identity,
                                                                     coverage=coverage,
                                                                     translation_table=translation_table,
                                                                     disable_bar=disable_bar)
         else:
             align_file, seq2pang = get_input_seq_to_family_with_all(pangenome=pangenome, sequence_files=sequence_file,
                                                                     output=output, tmpdir=new_tmpdir,
-                                                                    is_input_seq_nt=is_nucleotide,
+                                                                    input_type=input_type,
                                                                     is_input_slf=single_line_fasta,
                                                                     cpu=cpu, no_defrag=no_defrag, identity=identity,
                                                                     coverage=coverage,
diff --git a/ppanggolin/annotate/annotate.py b/ppanggolin/annotate/annotate.py
index 6bf37a12..659954c9 100644
--- a/ppanggolin/annotate/annotate.py
+++ b/ppanggolin/annotate/annotate.py
@@ -510,13 +510,11 @@ def read_org_gbff(organism_name: str, gbff_file_path: Path, circular_contigs: Li
                 rna_counter += 1
 
     genome_metadata, contig_to_uniq_metadata = combine_contigs_metadata(contig_to_metadata)
-    
-    genome_meta_obj = Metadata(source='annotation_file', **genome_metadata)
-    organism.add_metadata(source="annotation_file", metadata=genome_meta_obj)
+
+    organism.add_metadata(metadata=Metadata(source='annotation_file', **genome_metadata))
 
     for contig, metadata_dict in contig_to_uniq_metadata.items():
-        contigs_meta_obj = Metadata(source='annotation_file', **metadata_dict)
-        contig.add_metadata(source="annotation_file", metadata=contigs_meta_obj)
+        contig.add_metadata(Metadata(source='annotation_file', **metadata_dict))
 
     if used_transl_table_arg:
         logging.getLogger("PPanGGOLiN").info(
@@ -791,8 +789,7 @@ def add_metadata_from_gff_file(contig_name_to_region_info: Dict[str, str], org:
     if len(contig_name_to_region_info) == org.number_of_contigs:
         genome_metadata, contig_to_uniq_metadata = combine_contigs_metadata(contig_name_to_region_info)
         if genome_metadata:
-            genome_meta_obj = Metadata(source='annotation_file', **genome_metadata)
-            org.add_metadata(source="annotation_file", metadata=genome_meta_obj)
+            org.add_metadata(Metadata(source='annotation_file', **genome_metadata))
     else:
         logging.getLogger("PPanGGOLiN").warning(
             f"Inconsistent data in GFF file {gff_file_path}: "
@@ -804,9 +801,8 @@ def add_metadata_from_gff_file(contig_name_to_region_info: Dict[str, str], org:
             contig = org.get(contig_name)
         except KeyError:
             raise ValueError(f"Contig '{contig_name}' does not exist in the genome object created from GFF file {gff_file_path}.")
-        
-        contigs_meta_obj = Metadata(source='annotation_file', **metadata_dict)
-        contig.add_metadata(source="annotation_file", metadata=contigs_meta_obj)
+
+        contig.add_metadata(Metadata(source='annotation_file', **metadata_dict))
 
 
 def check_and_add_extra_gene_part(gene: Gene, new_gene_info: Dict, max_separation: int = 10):
diff --git a/ppanggolin/cluster/cluster.py b/ppanggolin/cluster/cluster.py
index aed0a4dc..ab1046b9 100644
--- a/ppanggolin/cluster/cluster.py
+++ b/ppanggolin/cluster/cluster.py
@@ -19,7 +19,7 @@
 from ppanggolin.pangenome import Pangenome
 from ppanggolin.genome import Gene
 from ppanggolin.geneFamily import GeneFamily
-from ppanggolin.utils import read_compressed_or_not, restricted_float, run_subprocess, create_tmpdir, mk_outdir
+from ppanggolin.utils import read_compressed_or_not, restricted_float, run_subprocess, create_tmpdir
 from ppanggolin.formats.writeBinaries import write_pangenome, erase_pangenome
 from ppanggolin.formats.readBinaries import check_pangenome_info, write_gene_sequences_from_pangenome_file
 from ppanggolin.formats.writeSequences import write_gene_sequences_from_annotations, translate_genes, create_mmseqs_db
@@ -61,7 +61,8 @@ def check_pangenome_for_clustering(pangenome: Pangenome, sequences: Path, force:
     elif pangenome.status["geneSequences"] == "inFile":
         logging.getLogger("PPanGGOLiN").debug("Write sequences from pangenome file")
         write_gene_sequences_from_pangenome_file(pangenome.file, sequences, add="ppanggolin_",
-                                                 compress=False, disable_bar=disable_bar)  # write CDS sequences to the tmpFile
+                                                 compress=False,
+                                                 disable_bar=disable_bar)  # write CDS sequences to the tmpFile
     else:
         raise Exception("The pangenome does not include gene sequences, thus it is impossible to cluster "
                         "the genes in gene families. Either provide clustering results (see --clusters), "
@@ -286,7 +287,7 @@ def clustering(pangenome: Pangenome, tmpdir: Path, cpu: int = 1, defrag: bool =
     date = time.strftime("_%Y-%m-%d_%H-%M-%S", time.localtime())
     dir_name = f'clustering_tmpdir_{date}_PID{os.getpid()}'
     with create_tmpdir(tmpdir, basename=dir_name, keep_tmp=keep_tmp_files) as tmp_path:
-        sequence_path = tmp_path/'nucleotide_sequences.fna'
+        sequence_path = tmp_path / 'nucleotide_sequences.fna'
         check_pangenome_for_clustering(pangenome, sequence_path, force, disable_bar=disable_bar)
         logging.getLogger("PPanGGOLiN").info("Clustering all of the genes sequences...")
         rep, tsv = first_clustering(sequence_path, tmp_path, cpu, code, coverage, identity, mode)
@@ -356,8 +357,32 @@ def infer_singletons(pangenome: Pangenome):
     logging.getLogger("PPanGGOLiN").info(f"Inferred {singleton_counter} singleton families")
 
 
-def read_clustering(pangenome: Pangenome, families_tsv_file: Path, infer_singleton: bool = False, force: bool = False,
-                    disable_bar: bool = False):
+def get_family_representative_sequences(pangenome: Pangenome, code: int = 11, cpu: int = 1,
+                                        tmpdir: Path = None, keep_tmp: bool = False):
+    tmpdir = Path(tempfile.gettempdir()) if tmpdir is None else tmpdir
+    with create_tmpdir(tmpdir, "get_proteins_sequences", keep_tmp) as tmp:
+        repres_path = tmp / "representative.fna"
+        with open(repres_path, "w") as repres_seq:
+            for family in pangenome.gene_families:
+                repres_seq.write(f">{family.name}\n")
+                repres_seq.write(f"{family.representative.dna}\n")
+        translate_db = translate_genes(sequences=repres_path, tmpdir=tmp, cpu=cpu,
+                                       is_single_line_fasta=True, code=code)
+        outpath = tmp / "representative_protein_genes.fna"
+        cmd = list(map(str, ["mmseqs", "convert2fasta", translate_db, outpath]))
+        run_subprocess(cmd, msg="MMSeqs convert2fasta failed with the following error:\n")
+        with open(outpath, "r") as repres_prot:
+            lines = repres_prot.readlines()
+            while len(lines) > 0:
+                family_name = lines.pop(0).strip()[1:]
+                family_seq = lines.pop(0).strip()
+                family = pangenome.get_gene_family(family_name)
+                family.add_sequence(family_seq)
+
+
+def read_clustering(pangenome: Pangenome, families_tsv_file: Path, infer_singleton: bool = False,
+                    code: int = 11, cpu: int = 1, tmpdir: Path = None, keep_tmp: bool = False,
+                    force: bool = False, disable_bar: bool = False):
     """
     Get the pangenome information, the gene families and the genes with an associated gene family.
     Reads a families tsv file from mmseqs2 output and adds the gene families and the genes to the pangenome.
@@ -365,11 +390,15 @@ def read_clustering(pangenome: Pangenome, families_tsv_file: Path, infer_singlet
     :param pangenome: Input Pangenome
     :param families_tsv_file: MMseqs2 clustering results
     :param infer_singleton: creates a new family for each gene with no associated family
+    :param code: Genetic code used for sequence translation.
+    :param cpu: Number of CPU cores to use for clustering.
+    :param tmpdir: Path to a temporary directory for intermediate files.
+    :param keep_tmp: Keep temporary files (useful for debugging).
     :param force: force to write in the pangenome
     :param disable_bar: Allow to disable progress bar
     """
     check_pangenome_former_clustering(pangenome, force)
-    check_pangenome_info(pangenome, need_annotations=True, disable_bar=disable_bar)
+    check_pangenome_info(pangenome, need_annotations=True, need_gene_sequences=True, disable_bar=disable_bar)
 
     logging.getLogger("PPanGGOLiN").info(f"Reading {families_tsv_file.name} the gene families file ...")
     filesize = os.stat(families_tsv_file).st_size
@@ -421,10 +450,12 @@ def read_clustering(pangenome: Pangenome, families_tsv_file: Path, infer_singlet
                     f"You can either update your cluster file to ensure each gene has a cluster assignment, "
                     f"or use the '--infer_singletons' option to automatically infer a cluster for each non-clustered gene."
                 )
+    get_family_representative_sequences(pangenome, code, cpu, tmpdir, keep_tmp)
 
     pangenome.status["genesClustered"] = "Computed"
     if frag:  # if there was fragment information in the file.
         pangenome.status["defragmented"] = "Computed"
+    pangenome.status["geneFamilySequences"] = "Computed"
     pangenome.parameters["cluster"] = {}
     pangenome.parameters["cluster"]["# read_clustering_from_file"] = True
     pangenome.parameters["cluster"]["infer_singletons"] = infer_singleton
@@ -450,7 +481,8 @@ def launch(args: argparse.Namespace):
         if None in [args.tmpdir, args.cpu, args.no_defrag, args.translation_table,
                     args.coverage, args.identity, args.mode]:
             logging.getLogger("PPanGGOLiN").warning("You are using an option compatible only with clustering creation.")
-        read_clustering(pangenome, args.clusters, args.infer_singletons, args.force, disable_bar=args.disable_prog_bar)
+        read_clustering(pangenome, args.clusters, args.infer_singletons, args.translation_table,
+                        args.cpu, args.tmpdir, args.keep_tmp, args.force, disable_bar=args.disable_prog_bar)
         logging.getLogger("PPanGGOLiN").info("Done reading the cluster file")
     write_pangenome(pangenome, pangenome.file, args.force, disable_bar=args.disable_prog_bar)
 
@@ -488,11 +520,6 @@ def parser_clust(parser: argparse.ArgumentParser):
     clust.add_argument('--no_defrag', required=False, default=False, action="store_true",
                        help="DO NOT Use the defragmentation strategy to link potential fragments "
                             "with their original gene family.")
-    clust.add_argument("--translation_table", required=False, default="11",
-                       help="Translation table (genetic code) to use.")
-    # clust.add_argument("--compress")
-
-    clust.add_argument("-c", "--cpu", required=False, default=1, type=int, help="Number of available cpus")
 
     read = parser.add_argument_group(title="Read clustering arguments")
     read.add_argument('--clusters', required=False, type=Path,
@@ -502,7 +529,10 @@ def parser_clust(parser: argparse.ArgumentParser):
                       help="When reading a clustering result with --clusters, if a gene is not in the provided file"
                            " it will be placed in a cluster where the gene is the only member.")
     optional = parser.add_argument_group(title="Optional arguments")
-    optional.add_argument("--tmpdir", required=False, type=str, default=Path(tempfile.gettempdir()),
+    optional.add_argument("--translation_table", required=False, default="11",
+                          help="Translation table (genetic code) to use.")
+    optional.add_argument("-c", "--cpu", required=False, default=1, type=int, help="Number of available cpus")
+    optional.add_argument("--tmpdir", required=False, type=Path, default=Path(tempfile.gettempdir()),
                           help="directory for storing temporary files")
     optional.add_argument("--keep_tmp", required=False, default=False, action="store_true",
                           help="Keeping temporary files (useful for debugging).")
diff --git a/ppanggolin/context/searchGeneContext.py b/ppanggolin/context/searchGeneContext.py
index 508203fc..512d12a6 100644
--- a/ppanggolin/context/searchGeneContext.py
+++ b/ppanggolin/context/searchGeneContext.py
@@ -74,18 +74,17 @@ def search_gene_context_in_pangenome(pangenome: Pangenome, output: Path, tmpdir:
         logging.debug(f"Input sequences are {'nucleotide' if is_nucleotide else 'protein'} sequences")
 
         with create_tmpdir(main_dir=tmpdir, basename="align_input_seq_tmpdir", keep_tmp=keep_tmp) as new_tmpdir:
-
+            input_type = "nucleotide" if is_nucleotide else "unknow"
             if use_representatives:
                 _, seqid2fam = get_input_seq_to_family_with_rep(pangenome, sequence_file, output, new_tmpdir,
-                                                                is_input_seq_nt=is_nucleotide, is_input_slf=is_slf,
-                                                                cpu=cpu, no_defrag=no_defrag,
-                                                                identity=identity, coverage=coverage,
-                                                                translation_table=translation_table,
+                                                                input_type=input_type, is_input_slf=is_slf, cpu=cpu,
+                                                                no_defrag=no_defrag, identity=identity,
+                                                                coverage=coverage, translation_table=translation_table,
                                                                 disable_bar=disable_bar)
             else:
                 _, seqid2fam = get_input_seq_to_family_with_all(pangenome=pangenome, sequence_files=sequence_file,
                                                                 output=output, tmpdir=new_tmpdir,
-                                                                is_input_seq_nt=is_nucleotide, is_input_slf=is_slf,
+                                                                input_type=input_type, is_input_slf=is_slf,
                                                                 cpu=cpu, no_defrag=no_defrag,
                                                                 identity=identity, coverage=coverage,
                                                                 translation_table=translation_table,
diff --git a/ppanggolin/formats/readBinaries.py b/ppanggolin/formats/readBinaries.py
index 4c90be2c..85c1e7ac 100644
--- a/ppanggolin/formats/readBinaries.py
+++ b/ppanggolin/formats/readBinaries.py
@@ -682,7 +682,11 @@ def read_metadata(pangenome: Pangenome, h5f: tables.File, metatype: str,
         source_table = metadata_group._f_get_child(source)
         for row in tqdm(read_chunks(source_table), total=source_table.nrows, unit='metadata', disable=disable_bar):
             meta_dict = {'source': source}
-            identifier = row["ID"].decode() if isinstance(row["ID"], bytes) else int(row["ID"])
+            try:
+                meta_id = row["metadata_id"]
+            except KeyError:
+                meta_id = None
+            identifier = row["ID"].decode("utf-8") if isinstance(row["ID"], bytes) else int(row["ID"])
             # else:
             #     identifier = row["name"].decode()
             if metatype == "families":
@@ -706,8 +710,7 @@ def read_metadata(pangenome: Pangenome, h5f: tables.File, metatype: str,
             for field in row.dtype.names:
                 if field not in ["ID", "name"]:
                     meta_dict[field] = row[field].decode() if isinstance(row[field], bytes) else row[field]
-            meta = Metadata(**meta_dict)
-            element.add_metadata(source=source, metadata=meta)
+            element.add_metadata(metadata=Metadata(**meta_dict), metadata_id=meta_id)
     pangenome.status["metadata"][metatype] = "Loaded"
 
 
diff --git a/ppanggolin/formats/writeFlatMetadata.py b/ppanggolin/formats/writeFlatMetadata.py
index f4b91697..cfd58d12 100644
--- a/ppanggolin/formats/writeFlatMetadata.py
+++ b/ppanggolin/formats/writeFlatMetadata.py
@@ -89,7 +89,7 @@ def write_flat_metadata_files(pangenome: Pangenome, output: Path,
 
         for element in getattr(pangenome, element_type_to_attribute[element_type]):
             for source in set(element.sources) & set(sources):
-                for metadata in element.get_metadata_by_source(source):
+                for metadata in element.get_metadata_by_source(source).values():
                     metadata_dict = metadata.to_dict()
                     if element_type == "spots":
                         metadata_dict[element_type] = f"spot_{element.ID}"
diff --git a/ppanggolin/formats/writeFlatPangenome.py b/ppanggolin/formats/writeFlatPangenome.py
index a7d29b6e..cabdfbd8 100644
--- a/ppanggolin/formats/writeFlatPangenome.py
+++ b/ppanggolin/formats/writeFlatPangenome.py
@@ -976,11 +976,6 @@ def write_spot_modules(output: Path, compress: bool = False):
     """
     logging.getLogger("PPanGGOLiN").info("Writing modules to spot associations...")
 
-    fam2mod = {}
-    for mod in pan.modules:
-        for fam in mod.families:
-            fam2mod[fam] = mod
-
     with write_compressed_or_not(output / "modules_spots.tsv", compress) as fout:
         fout.write("module_id\tspot_id\n")
 
@@ -988,14 +983,14 @@ def write_spot_modules(output: Path, compress: bool = False):
             curr_mods = defaultdict(set)
             for rgp in spot.get_uniq_content():
                 for fam in rgp.families:
-                    mod = fam2mod.get(fam)
-                    if mod is not None:
-                        curr_mods[mod].add(fam)
+                    if fam.module is not None:
+                        curr_mods[fam.module].add(fam)
+
+            for module, mod_families_in_spot in curr_mods.items():
 
-            for mod in curr_mods:
-                if curr_mods[mod] == mod.families:
+                if mod_families_in_spot == set(module.families):
                     # if all the families in the module are found in the spot, write the association
-                    fout.write(f"module_{mod.ID}\tspot_{spot.ID}\n")
+                    fout.write(f"module_{module.ID}\tspot_{spot.ID}\n")
 
     logging.getLogger("PPanGGOLiN").info(
         f"Done writing module to spot associations to: {output.as_posix() + '/modules_spots.tsv'}")
@@ -1241,7 +1236,7 @@ def parser_flat(parser: argparse.ArgumentParser):
     optional.add_argument("--modules", required=False, action="store_true",
                           help="Write a tsv file listing functional modules and the families that belong to them")
     optional.add_argument("--spot_modules", required=False, action="store_true",
-                          help="writes 3 files comparing the presence of modules within spots")
+                          help="writes 2 files comparing the presence of modules within spots")
     
     optional.add_argument("--compress", required=False, action="store_true", help="Compress the files in .gz")
     optional.add_argument("-c", "--cpu", required=False, default=1, type=int, help="Number of available cpus")
diff --git a/ppanggolin/formats/writeMetadata.py b/ppanggolin/formats/writeMetadata.py
index e72cfec2..dbc95f78 100644
--- a/ppanggolin/formats/writeMetadata.py
+++ b/ppanggolin/formats/writeMetadata.py
@@ -5,7 +5,6 @@
 import logging
 from typing import Dict, List, Tuple, Union
 
-
 # installed libraries
 from tqdm import tqdm
 import numpy
@@ -58,7 +57,7 @@ def write_metadata_status(pangenome: Pangenome, h5f: tables.File, status_group:
     if metastatus["modules"] in ["Computed", "Loaded", "inFile"]:
         metadata_group._v_attrs.modules = True
         metasources_group._v_attrs.modules = metasources["modules"]
-        
+
     return True if any(metadata_group._v_attrs._f_list()) else False
 
 
@@ -99,12 +98,13 @@ def get_metadata_contig_len(select_ctg: List[Contig], source: str) -> Tuple[Dict
 
     :return: Maximum type and size of each element
     """
-    type_dict = {"ID": tables.Int64Col()}
+    type_dict = {"metadata_id": tables.Int64Col(),
+                 "ID": tables.Int64Col()}
     max_len_dict = {}
     expected_rows = 0
 
     for contig in select_ctg:
-        for metadata in contig.get_metadata_by_source(source):
+        for metadata in contig.get_metadata_by_source(source).values():
             for attr, value in ((k, v) for k, v in metadata.__dict__.items() if k != "source"):
                 if isinstance(value, bytes):
                     value = value.decode('UTF-8')
@@ -146,10 +146,13 @@ def write_metadata_contig(h5f: tables.File, source: str, select_contigs: List[Co
     source_table = h5f.create_table(metatype_group, source, desc_metadata(*meta_len[:-1]), expectedrows=meta_len[-1])
     meta_row = source_table.row
     for contig in tqdm(select_contigs, unit="contigs", desc=f'Source = {source}', disable=disable_bar):
-        for metadata in contig.get_metadata_by_source(source):
+        for meta_id, metadata in contig.get_metadata_by_source(source).items():
+            meta_row["metadata_id"] = meta_id
             for desc in source_table.colnames:
                 if desc == "ID":
                     meta_row[desc] = contig.ID
+                elif desc == "metadata_id":
+                    meta_row[desc] = meta_id
                 else:
                     if hasattr(metadata, desc):
                         value = metadata.__getattribute__(desc)
@@ -172,7 +175,7 @@ def get_metadata_len(select_elem: Union[List[Gene], List[Organism], List[GeneFam
 
     :return: Maximum type and size of each element
     """
-    type_dict = {}
+    type_dict = {"metadata_id": tables.Int64Col()}
     max_len_dict = {}
     expected_rows = 0
 
@@ -191,7 +194,7 @@ def get_metadata_len(select_elem: Union[List[Gene], List[Organism], List[GeneFam
         else:
             raise Exception("Unexpected attribute. A recent change could create this error."
                             " Please report the error on our github.")
-        for metadata in element.get_metadata_by_source(source):
+        for metadata in element.get_metadata_by_source(source).values():
             for attr, value in ((k, v) for k, v in metadata.__dict__.items() if k != "source"):
                 if isinstance(value, bytes):
                     value = value.decode('UTF-8')
@@ -233,17 +236,18 @@ def write_metadata_metatype(h5f: tables.File, source: str, metatype: str,
     """
     metatype_group = write_metadata_group(h5f, metatype)
     meta_len = get_metadata_len(select_elements, source)
-    # h5f.remove_node(metatype_group, source)
     source_table = h5f.create_table(metatype_group, source, desc_metadata(*meta_len[:-1]), expectedrows=meta_len[-1])
     meta_row = source_table.row
     for element in tqdm(select_elements, unit=metatype, desc=f'Source = {source}', disable=disable_bar):
-        for metadata in element.get_metadata_by_source(source):
+        for meta_id, metadata in element.get_metadata_by_source(source).items():
             for desc in source_table.colnames:
                 if desc == "ID":
                     if hasattr(element, 'name') and len(element.name) > 0:
                         meta_row[desc] = element.name
                     else:
                         meta_row[desc] = element.ID
+                elif desc == "metadata_id":
+                    meta_row[desc] = meta_id
                 else:
                     if hasattr(metadata, desc):
                         value = metadata.__getattribute__(desc)
@@ -301,55 +305,55 @@ def write_metadata(pangenome: Pangenome, h5f: tables.File, disable_bar: bool = F
     """
     if pangenome.status["metadata"]["families"] == "Computed":
         logging.getLogger("PPanGGOLiN").info("Writing gene families metadata in pangenome")
-        select_gf = list(pangenome.get_elem_by_sources(source=pangenome.status["metasources"]["families"][-1],
-                                                       metatype="families"))
+        select_gf = list(pangenome.get_elem_by_source(source=pangenome.status["metasources"]["families"][-1],
+                                                      metatype="families"))
         write_metadata_metatype(h5f, pangenome.status["metasources"]["families"][-1],
                                 "families", select_gf, disable_bar)
         pangenome.status["metadata"]["families"] = "Loaded"
 
     if pangenome.status["metadata"]["genomes"] == "Computed":
         logging.getLogger("PPanGGOLiN").info("Writing genomes metadata in pangenome")
-        select_genomes = list(pangenome.get_elem_by_sources(source=pangenome.status["metasources"]["genomes"][-1],
-                                                            metatype="genomes"))
+        select_genomes = list(pangenome.get_elem_by_source(source=pangenome.status["metasources"]["genomes"][-1],
+                                                           metatype="genomes"))
         write_metadata_metatype(h5f, pangenome.status["metasources"]["genomes"][-1],
                                 "genomes", select_genomes, disable_bar)
         pangenome.status["metadata"]["genomes"] = "Loaded"
 
     if pangenome.status["metadata"]["contigs"] == "Computed":
         logging.getLogger("PPanGGOLiN").info("Writing contigs metadata in pangenome")
-        select_contigs = list(pangenome.get_elem_by_sources(source=pangenome.status["metasources"]["contigs"][-1],
-                                                            metatype="contigs"))
+        select_contigs = list(pangenome.get_elem_by_source(source=pangenome.status["metasources"]["contigs"][-1],
+                                                           metatype="contigs"))
         write_metadata_contig(h5f, pangenome.status["metasources"]["contigs"][-1], select_contigs, disable_bar)
         pangenome.status["metadata"]["contigs"] = "Loaded"
 
     if pangenome.status["metadata"]["genes"] == "Computed":
         logging.getLogger("PPanGGOLiN").info("Writing genes metadata in pangenome")
-        select_genes = list(pangenome.get_elem_by_sources(source=pangenome.status["metasources"]["genes"][-1],
-                                                          metatype="genes"))
+        select_genes = list(pangenome.get_elem_by_source(source=pangenome.status["metasources"]["genes"][-1],
+                                                         metatype="genes"))
         write_metadata_metatype(h5f, pangenome.status["metasources"]["genes"][-1],
                                 "genes", select_genes, disable_bar)
         pangenome.status["metadata"]["genes"] = "Loaded"
 
     if pangenome.status["metadata"]["RGPs"] == "Computed":
         logging.getLogger("PPanGGOLiN").info("Writing RGPs metadata in pangenome")
-        select_rgps = list(pangenome.get_elem_by_sources(source=pangenome.status["metasources"]["RGPs"][-1],
-                                                         metatype="RGPs"))
+        select_rgps = list(pangenome.get_elem_by_source(source=pangenome.status["metasources"]["RGPs"][-1],
+                                                        metatype="RGPs"))
         write_metadata_metatype(h5f, pangenome.status["metasources"]["RGPs"][-1],
                                 "RGPs", select_rgps, disable_bar)
         pangenome.status["metadata"]["RGPs"] = "Loaded"
 
     if pangenome.status["metadata"]["spots"] == "Computed":
         logging.getLogger("PPanGGOLiN").info("Writing spots metadata in pangenome")
-        select_spots = list(pangenome.get_elem_by_sources(source=pangenome.status["metasources"]["spots"][-1],
-                                                          metatype="spots"))
+        select_spots = list(pangenome.get_elem_by_source(source=pangenome.status["metasources"]["spots"][-1],
+                                                         metatype="spots"))
         write_metadata_metatype(h5f, pangenome.status["metasources"]["spots"][-1],
                                 "spots", select_spots, disable_bar)
         pangenome.status["metadata"]["spots"] = "Loaded"
 
     if pangenome.status["metadata"]["modules"] == "Computed":
         logging.getLogger("PPanGGOLiN").info("Writing modules metadata in pangenome")
-        select_modules = list(pangenome.get_elem_by_sources(source=pangenome.status["metasources"]["modules"][-1],
-                                                            metatype="modules"))
+        select_modules = list(pangenome.get_elem_by_source(source=pangenome.status["metasources"]["modules"][-1],
+                                                           metatype="modules"))
         write_metadata_metatype(h5f, pangenome.status["metasources"]["modules"][-1],
                                 "modules", select_modules, disable_bar)
         pangenome.status["metadata"]["modules"] = "Loaded"
diff --git a/ppanggolin/formats/writeSequences.py b/ppanggolin/formats/writeSequences.py
index 41cc3e0d..8b238411 100644
--- a/ppanggolin/formats/writeSequences.py
+++ b/ppanggolin/formats/writeSequences.py
@@ -497,7 +497,7 @@ def write_regions_sequences(pangenome: Pangenome, output: Path, regions: str, fa
     for line in read_compressed_or_not(organisms_file):
         elements = [el.strip() for el in line.split("\t")]
         if len(elements) <= 1:
-            raise SyntaxError(f"No tabulation separator found in given --fasta or --anno file: '{organisms_file}'")
+            raise ValueError(f"No tabulation separator found in given --fasta or --anno file: '{organisms_file}'")
         org_dict[elements[0]] = Path(elements[1])
         if not org_dict[elements[0]].exists():  # Check tsv sanity test if it's not one it's the other
             org_dict[elements[0]] = organisms_file.parent.joinpath(org_dict[elements[0]])
diff --git a/ppanggolin/geneFamily.py b/ppanggolin/geneFamily.py
index b856fd6f..708a3dd6 100644
--- a/ppanggolin/geneFamily.py
+++ b/ppanggolin/geneFamily.py
@@ -188,6 +188,13 @@ def remove(self, identifier):
         del self[identifier]
 
 
+    @property
+    def representative(self) -> Gene:
+        """Get the representative gene of the family
+        :return: The representative gene of the family
+        """
+        return self.get(self.name)
+
     def contains_gene_id(self, identifier):
         """
         Check if the family contains already a gene id
diff --git a/ppanggolin/genome.py b/ppanggolin/genome.py
index 504197fe..87e2ef46 100644
--- a/ppanggolin/genome.py
+++ b/ppanggolin/genome.py
@@ -591,7 +591,7 @@ def get_genes(self, begin: int = 0, end: int = None, outrange_ok: bool = False)
             raise TypeError(f"Expected type int for 'begin' and 'end', "
                             f"but received types '{type(begin)}' and '{type(end)}'.")
 
-        if begin >= end:
+        if begin > end:
             raise ValueError("The 'begin' position must be less than the 'end' position.")
 
         if end > self._genes_position[-1].position:
@@ -603,7 +603,10 @@ def get_genes(self, begin: int = 0, end: int = None, outrange_ok: bool = False)
         if end == self._genes_position[-1].position:
             return self._genes_position[begin:]
         else:
-            return self._genes_position[begin: end]
+            if begin == end:
+                return self._genes_position[begin]
+            else:
+                return self._genes_position[begin: end]
 
     @property
     def number_of_genes(self) -> int:
diff --git a/ppanggolin/meta/meta.py b/ppanggolin/meta/meta.py
index 87d9d82c..cacf5534 100644
--- a/ppanggolin/meta/meta.py
+++ b/ppanggolin/meta/meta.py
@@ -135,8 +135,7 @@ def check_duplicate_contig_name():
             else:
                 logging.getLogger().debug(f"{metatype}: {row[metatype]} doesn't exist")
         else:
-            meta = Metadata(source=source, **{k: v for k, v in row.to_dict().items() if k != metatype})
-            element.add_metadata(source=source, metadata=meta)
+            element.add_metadata(Metadata(source=source, **{k: v for k, v in row.to_dict().items() if k != metatype}))
 
     pangenome.status["metadata"][metatype] = "Computed"
     pangenome.status["metasources"][metatype].append(source)
diff --git a/ppanggolin/metadata.py b/ppanggolin/metadata.py
index 2d6b5c73..b0021b64 100644
--- a/ppanggolin/metadata.py
+++ b/ppanggolin/metadata.py
@@ -76,7 +76,7 @@ def fields(self) -> List[str]:
         fields = list(self.__dict__)
         fields.remove("source")
         return fields
-    
+
     def to_dict(self) -> Dict[str, Any]:
         """
         Get metadata in dict format.
@@ -103,7 +103,13 @@ class MetaFeatures:
     def __init__(self):
         """Constructor method
         """
-        self._metadata_getter = defaultdict(list)
+        self._metadata_getter = defaultdict(dict)
+
+    @property
+    def number_of_metadata(self) -> int:
+        """Get the number of metadata associated to feature
+        """
+        return sum(len(meta_dict) for meta_dict in self._metadata_getter.values())
 
     @property
     def metadata(self) -> Generator[Metadata, None, None]:
@@ -112,8 +118,8 @@ def metadata(self) -> Generator[Metadata, None, None]:
         :return: Metadata from all sources
         """
 
-        for meta_list in self._metadata_getter.values():
-            for metadata in meta_list:
+        for meta_dict in self._metadata_getter.values():
+            for metadata in meta_dict.values():
                 yield metadata
 
     @property
@@ -147,21 +153,33 @@ def formatted_metadata_dict(self, separator: str = "|") -> Dict[str, str]:
 
         return {source_field: separator.join(values) for source_field, values in source_field_2_values.items()}
 
-    
-    def add_metadata(self, source, metadata):
+    def add_metadata(self, metadata: Metadata, metadata_id: int = None) -> None:
         """Add metadata to metadata getter
 
-        :param source: Name of the metadata source
         :param metadata: metadata value to add for the source
+        :param metadata_id: metadata identifier
 
         :raises AssertionError: Source or metadata is not with the correct type
         """
         assert isinstance(metadata, Metadata), f"Metadata is not with type Metadata but with {type(metadata)}"
-        assert isinstance(source, str), f"Source is not a string but with {type(source)}"
-
-        self._metadata_getter[source].append(metadata)
 
-    def get_metadata_by_source(self, source: str) -> Union[List[Metadata], None]:
+        # Metadata_id should not already exist because the metadata are added from scratch to a new source,
+        # or they are ridden
+        if metadata_id is None:
+            if self.has_source(metadata.source):
+                metadata_id = max([meta_id for meta_id in self._metadata_getter[metadata.source].keys()]) + 1
+            else:
+                metadata_id = 0
+
+        try:
+            self._metadata_getter[metadata.source][metadata_id]
+        except KeyError:
+            self._metadata_getter[metadata.source][metadata_id] = metadata
+        else:
+            raise KeyError(f"A metadata with ID {metadata_id} already exist "
+                           f"for source {metadata.source} in {str(self)}")
+
+    def get_metadata_by_source(self, source: str) -> Union[Dict[int, Metadata], None]:
         """Get all the metadata feature corresponding to the source
 
         :param source: Name of the source to get
@@ -200,16 +218,15 @@ def del_metadata_by_source(self, source: str):
 
     def del_metadata_by_attribute(self, **kwargs):
         """Remove a source from the feature
-
-        :param source: Name of the source to delete
         """
-        for source, metadata in self._metadata_getter.items():
+        for source, metadata_dict in self._metadata_getter.items():
             for attr, value in kwargs.items():
-                if hasattr(metadata, attr):
-                    # BUG If value is a list, the join block detection.
-                    # It would be better to keep a list and change in writing and reading metadata to join the list
-                    if getattr(metadata, attr, None) in value or getattr(metadata, attr, None) == value:
-                        self._metadata_getter[source].remove(metadata)
+                for meta_id, metadata in metadata_dict.items():
+                    if hasattr(metadata, attr):
+                        # BUG If value is a list, the join block detection.
+                        # It would be better to keep a list and change in writing and reading metadata to join the list
+                        if getattr(metadata, attr, None) in value or getattr(metadata, attr, None) == value:
+                            del self._metadata_getter[source][meta_id]
 
     def max_metadata_by_source(self) -> Tuple[str, int]:
         """Get the maximum number of metadata for one source
@@ -226,4 +243,13 @@ def has_metadata(self) -> bool:
         :return: True if it has metadata else False
         """
 
-        return True if len(self._metadata_getter) > 0 else False
\ No newline at end of file
+        return True if self.number_of_metadata > 0 else False
+
+    def has_source(self, source: str) -> bool:
+        """Check if the source is in the metadata feature
+
+        :param source: name of the source
+
+        :return: True if the source is in the metadata feature else False
+        """
+        return True if source in self._metadata_getter else False
diff --git a/ppanggolin/pangenome.py b/ppanggolin/pangenome.py
index d61873a3..37cb3b84 100644
--- a/ppanggolin/pangenome.py
+++ b/ppanggolin/pangenome.py
@@ -827,15 +827,15 @@ def metadata_sources(self, metatype: str) -> Set[str]:
     def metadata(self, metatype: str) -> Generator[Metadata, None, None]:
         """Create a generator with all metadatas in the pangenome
 
-		:param metatype: Select to which pangenome element metadata should be generate
+        :param metatype: Select to which pangenome element metadata should be generate
 
         :return: Set of metadata source
         """
         for elem in self.select_elem(metatype):
             yield elem.metadata
 
-    def get_elem_by_metadata(self, metatype: str, **kwargs) -> Generator[
-        Union[GeneFamily, Gene, Organism, Region, Spot, Module], None, None]:
+    def get_elem_by_metadata(self, metatype: str, **kwargs
+                             ) -> Generator[Union[GeneFamily, Gene, Organism, Region, Spot, Module], None, None]:
         """Get element in pangenome with metadata attribute expected
 
         :param metatype: Select to which pangenome element metadata
@@ -847,16 +847,16 @@ def get_elem_by_metadata(self, metatype: str, **kwargs) -> Generator[
             if len(list(elem.get_metadata_by_attribute(**kwargs))) > 0:
                 yield elem
 
-    def get_elem_by_sources(self, source: List[str],
-                            metatype: str) -> Generator[Union[GeneFamily, Gene, Contig, Organism,
-                                                              Region, Spot, Module], None, None]:
-        """ Get gene famlies with a specific source in pangenome
+    def get_elem_by_source(self, source: str, metatype: str
+                           ) -> Generator[Union[GeneFamily, Gene, Contig, Organism, Region, Spot, Module], None, None]:
+        """ Get gene families with a specific source in pangenome
 
         :param source: Name of the source
+        :param metatype: select to which pangenome element metadata should be written
 
         :return: Gene families with the source
         """
         assert metatype in ["families", "genomes", "contigs", "genes", "RGPs", "spots", "modules"]
         for elem in self.select_elem(metatype):
-            if elem.get_metadata_by_source(source) is not None:
+            if elem.has_source(source):
                 yield elem
diff --git a/ppanggolin/projection/projection.py b/ppanggolin/projection/projection.py
index 08c0da5f..8a734201 100644
--- a/ppanggolin/projection/projection.py
+++ b/ppanggolin/projection/projection.py
@@ -536,21 +536,10 @@ def annotate_input_genes_with_pangenome_families(pangenome: Pangenome, input_org
 
     :return: Number of genes that do not cluster with any of the gene families of the pangenome.
     """
-    seq_fasta_files = []
-
     logging.getLogger('PPanGGOLiN').info('Writing gene sequences of input genomes.')
 
     input_genes = [gene for org in input_organisms for gene in org.genes]
 
-    # for input_organism in input_organisms:
-    #     seq_outdir = output / input_organism.name
-    #     mk_outdir(seq_outdir, force=True)
-    #
-    #     seq_fasta_file = seq_outdir / "cds_sequences.fasta"
-    #
-    #     write_gene_sequences_from_annotations(input_organism.genes, seq_fasta_file, disable_bar=True, add="ppanggolin_")
-    #
-    #     seq_fasta_files.append(seq_fasta_file)
     seq_fasta_file = output / 'input_genes.fasta'
 
     write_gene_sequences_from_annotations(input_genes, seq_fasta_file, disable_bar=True, add='ppanggolin_')
@@ -558,17 +547,16 @@ def annotate_input_genes_with_pangenome_families(pangenome: Pangenome, input_org
     with create_tmpdir(main_dir=tmpdir, basename="projection_tmp", keep_tmp=keep_tmp) as new_tmpdir:
         if use_representatives:
             _, seqid_to_gene_family = get_input_seq_to_family_with_rep(pangenome=pangenome,
-                                                                       sequence_files=seq_fasta_file,
-                                                                       output=new_tmpdir, tmpdir=new_tmpdir,
-                                                                       is_input_seq_nt=True, is_input_slf=True,
-                                                                       cpu=cpu, no_defrag=no_defrag,
+                                                                       sequence_files=seq_fasta_file, output=new_tmpdir,
+                                                                       tmpdir=new_tmpdir, input_type="nucleotide",
+                                                                       is_input_slf=True, cpu=cpu, no_defrag=no_defrag,
                                                                        identity=identity, coverage=coverage,
                                                                        translation_table=translation_table)
         else:
             _, seqid_to_gene_family = get_input_seq_to_family_with_all(pangenome=pangenome,
                                                                        sequence_files=seq_fasta_file,
                                                                        output=new_tmpdir, tmpdir=new_tmpdir,
-                                                                       is_input_seq_nt=True, is_input_slf=True,
+                                                                       input_type="nucleotide", is_input_slf=True,
                                                                        cpu=cpu, no_defrag=no_defrag, identity=identity,
                                                                        coverage=coverage,
                                                                        translation_table=translation_table,
diff --git a/ppanggolin/utils.py b/ppanggolin/utils.py
index 1784f0f6..f9d9e839 100755
--- a/ppanggolin/utils.py
+++ b/ppanggolin/utils.py
@@ -586,8 +586,8 @@ def combine_args(args: argparse.Namespace, another_args: argparse.Namespace):
     return args
 
 
-def get_args_that_different_from_default(default_args: argparse.Namespace, final_args: argparse.Namespace,
-                                         param_to_ignore: Union[List[str], Set[str]] = None) -> dict:
+def get_args_differing_from_default(default_args: argparse.Namespace, final_args: argparse.Namespace,
+                                    param_to_ignore: Union[List[str], Set[str]] = None) -> dict:
     """
     Get the parameters that have different value than default values.
 
@@ -665,7 +665,7 @@ def manage_cli_and_config_args(subcommand: str, config_file: str, subcommand_to_
     # cli > config > default
 
     args = overwrite_args(default_args, config_args, cli_args)
-    params_that_differ = get_args_that_different_from_default(default_args, args, input_params)
+    params_that_differ = get_args_differing_from_default(default_args, args, input_params)
 
     if params_that_differ:
         params_that_differ_str = ', '.join([f'{p}={v}' for p, v in params_that_differ.items()])
@@ -706,7 +706,7 @@ def manage_cli_and_config_args(subcommand: str, config_file: str, subcommand_to_
 
             step_args = overwrite_args(default_step_args, config_step_args, cli_args)
 
-            step_params_that_differ = get_args_that_different_from_default(default_step_args, step_args)
+            step_params_that_differ = get_args_differing_from_default(default_step_args, step_args)
 
             if step_params_that_differ:
                 step_params_that_differ_str = ', '.join([f'{p}={v}' for p, v in step_params_that_differ.items()])
@@ -1189,6 +1189,16 @@ def get_consecutive_region_positions(region_positions: List[int], contig_gene_co
 
 
 def run_subprocess(cmd: List[str], output: Path = None, msg: str = "Subprocess failed with the following error:\n"):
+    """Run a subprocess command and write the output to the given path.
+
+    :param cmd: list of program arguments
+    :param output: path to write the subprocess output
+    :param msg: message to print if the subprocess fails
+
+    :return:
+
+    :raises subprocess.CalledProcessError: raise when the subprocess return a non-zero exit code
+    """
     logging.getLogger("PPanGGOLiN").debug(" ".join(cmd))
     try:
         result = subprocess.run(cmd, check=True, capture_output=True, text=True)
@@ -1199,4 +1209,4 @@ def run_subprocess(cmd: List[str], output: Path = None, msg: str = "Subprocess f
     else:
         if output is not None:
             with open(output, 'w') as fout:
-                fout.write(result.stdout)
\ No newline at end of file
+                fout.write(result.stdout)
diff --git a/ppanggolin/workflow/all.py b/ppanggolin/workflow/all.py
index da91697b..3b2bc937 100644
--- a/ppanggolin/workflow/all.py
+++ b/ppanggolin/workflow/all.py
@@ -64,9 +64,9 @@ def launch_workflow(args: argparse.Namespace, panrgp: bool = True,
 
         if args.clusters is not None:
             start_clust = time.time()
-            print(args.cluster)
-            read_clustering(pangenome, args.clusters, disable_bar=args.disable_prog_bar,
-                            infer_singleton=args.cluster.infer_singletons)
+            read_clustering(pangenome, args.clusters, infer_singleton=args.cluster.infer_singletons,
+                            code=args.cluster.translation_table, cpu=args.cluster.cpu, tmpdir=args.tmpdir,
+                            keep_tmp=args.cluster.keep_tmp, force=args.force, disable_bar=args.disable_prog_bar)
         else:  # args.cluster is None
             if pangenome.status["geneSequences"] == "No":
                 if args.fasta is None:
@@ -80,7 +80,7 @@ def launch_workflow(args: argparse.Namespace, panrgp: bool = True,
                        disable_bar=args.disable_prog_bar,
                        defrag=not args.cluster.no_defrag, code=args.cluster.translation_table,
                        coverage=args.cluster.coverage, identity=args.cluster.identity, mode=args.cluster.mode,
-                       keep_tmp_files=True)
+                       keep_tmp_files=args.cluster.keep_tmp)
         clust_time = time.time() - start_clust
 
     elif args.fasta is not None:
diff --git a/pyproject.toml b/pyproject.toml
index abdbf2d7..7864f533 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -81,9 +81,3 @@ packages = ["ppanggolin"]
 
 [tool.setuptools.dynamic]
 version = {file = "VERSION"}
-
-[tool.pytest.ini_options]
-markers = [
-    "nucleotides: marks tests to run with nucleotides alphabets",
-    "aminoacids: marks tests to run with amino-acids alphabets",
-]
\ No newline at end of file
diff --git a/tests/align/test_align.py b/tests/align/test_align.py
index e5ce2dde..42d1cd5f 100644
--- a/tests/align/test_align.py
+++ b/tests/align/test_align.py
@@ -6,106 +6,77 @@
 
 
 @pytest.fixture
-def nucleotides():
-    yield ['A', 'C', 'G', 'T', 'N']
+def single_line_fasta_nt() -> List:
+            
+    return ['>Gene_1 seq_description\n',
+            'ATGCGTTGTCGTTG\n',
+            ">Gene_2\n",
+            "TGTGACCTGCT\n"
+            ]
 
 @pytest.fixture
-def aminoacids():
-    amino_acid_alphabet = [
-        'A',  # Alanine
-        'R',  # Arginine
-        'N',  # Asparagine
-        'D',  # Aspartic acid
-        'C',  # Cysteine
-        'E',  # Glutamic acid
-        'Q',  # Glutamine
-        'G',  # Glycine
-        'H',  # Histidine
-        'I',  # Isoleucine
-        'L',  # Leucine
-        'K',  # Lysine
-        'M',  # Methionine
-        'F',  # Phenylalanine
-        'P',  # Proline
-        'S',  # Serine
-        'T',  # Threonine
-        'W',  # Tryptophan
-        'Y',  # Tyrosine
-        'V'  # Valine
-    ]
-    yield amino_acid_alphabet
+def single_line_fasta_aa() -> List:
+            
+    return ['>Gene_1 seq_description\n',
+            'YWTPRPFFYAAEYNN\n',
+            ">Gene_2\n",
+            "YWTPRPSYWTPAAEYNN\n"
+            ]
 
 @pytest.fixture
-def number_of_sequences():
-    yield randint(4, 10)
-@pytest.fixture
-def single_line_fasta(request, tmp_path_factory: pytest.TempPathFactory, number_of_sequences,
-                      aminoacids: List[str], nucleotides: List[str]):
-    if request.node.get_closest_marker('aminoacids'):
-        alphabet = aminoacids
-        fasta_path = tmp_path_factory.getbasetemp() / "single_line_nt.fasta"
-    else:
-        alphabet = nucleotides
-        fasta_path = tmp_path_factory.getbasetemp() / "single_line_aa.fasta"
-    with open(fasta_path, "w") as fasta:
-        for i in range(number_of_sequences):
-            fasta.write(f">Gene_{i}\n")
-            fasta.write("".join([choice(alphabet) for _ in range(0, randint(30, 100))]))
-            fasta.write("\n")
-    yield fasta_path
+def multi_line_fasta_nt() -> List:
+            
+    return ['>Gene_1 seq_description\n',
+            'ATGCGT\n',
+            'TGTCGTTG\n',
+            ">Gene_2\n",
+            "TGTGACCTGCT\n"
+            ]
 
 @pytest.fixture
-def multi_line_fasta(request, tmp_path_factory: pytest.TempPathFactory, number_of_sequences,
-                     aminoacids: List[str], nucleotides: List[str]):
-    if request.node.get_closest_marker('aminoacids'):
-        alphabet = aminoacids
-        fasta_path = tmp_path_factory.getbasetemp() / "single_line_nt.fasta"
-    else:
-        alphabet = nucleotides
-        fasta_path = tmp_path_factory.getbasetemp() / "single_line_aa.fasta"
-    with open(fasta_path, "w") as fasta:
-        for i in range(number_of_sequences):
-            fasta.write(f">Gene_{i}\n")
-            for j in range(randint(4,10)):
-                fasta.write("".join([choice(alphabet) for _ in range(60)]))
-                fasta.write("\n")
-    yield fasta_path
-
-@pytest.mark.nucleotides
-def test_get_seq_ids_single_line_nt(number_of_sequences, single_line_fasta):
-    with open(single_line_fasta, "r") as fasta:
-        seq_set, is_nucleotide, single_line_fasta = get_seq_ids(fasta)
-        assert len(seq_set) == number_of_sequences
-        assert seq_set == {f"Gene_{i}" for i in range(number_of_sequences)}
-        assert is_nucleotide
-        assert single_line_fasta
-
-@pytest.mark.aminoacids
-def test_get_seq_ids_single_line_aa(number_of_sequences, single_line_fasta):
-    with open(single_line_fasta, "r") as fasta:
-        seq_set, is_nucleotide, single_line_fasta = get_seq_ids(fasta)
-        assert len(seq_set) == number_of_sequences
-        assert seq_set == {f"Gene_{i}" for i in range(number_of_sequences)}
-        assert not is_nucleotide
-        assert single_line_fasta
-
-@pytest.mark.nucleotides
-def test_get_seq_ids_multi_line_nt(number_of_sequences, multi_line_fasta):
-    with open(multi_line_fasta, "r") as fasta:
-        seq_set, is_nucleotide, single_line_fasta = get_seq_ids(fasta)
-        assert len(seq_set) == number_of_sequences
-        assert seq_set == {f"Gene_{i}" for i in range(number_of_sequences)}
-        assert is_nucleotide
-        assert not single_line_fasta
-
-@pytest.mark.aminoacids
-def test_get_seq_ids_multi_line_aa(number_of_sequences, multi_line_fasta):
-    with open(multi_line_fasta, "r") as fasta:
-        seq_set, is_nucleotide, single_line_fasta = get_seq_ids(fasta)
-        assert len(seq_set) == number_of_sequences
-        assert seq_set == {f"Gene_{i}" for i in range(number_of_sequences)}
-        assert not is_nucleotide
-        assert not single_line_fasta
+def multi_line_fasta_aa() -> List:
+            
+    return ['>Gene_1 seq_description\n',
+            'AAEYNN\n',
+            'YWTPRPFFY\n',
+            ">Gene_2\n",
+            "YWTPRPS\n",
+            "YWTPAAEYNN\n"
+            ]
+
+
+def test_get_seq_ids_single_line_nt(single_line_fasta_nt):
+    
+    
+    seq_set, is_nucleotide, single_line_fasta = get_seq_ids(single_line_fasta_nt)
+    assert len(seq_set) == 2
+    assert seq_set == {'Gene_1', 'Gene_2'}
+    assert is_nucleotide
+    assert single_line_fasta
+
+def test_get_seq_ids_single_line_aa(single_line_fasta_aa):
+    seq_set, is_nucleotide, single_line_fasta = get_seq_ids(single_line_fasta_aa)
+    assert len(seq_set) == 2
+    assert seq_set == {'Gene_1', 'Gene_2'}
+    assert not is_nucleotide
+    assert single_line_fasta
+
+def test_get_seq_ids_multi_line_nt(multi_line_fasta_nt):
+
+    seq_set, is_nucleotide, single_line_fasta = get_seq_ids(multi_line_fasta_nt)
+
+    assert len(seq_set) == 2
+    assert seq_set == {'Gene_1', 'Gene_2'}
+    assert is_nucleotide
+    assert not single_line_fasta
+
+def test_get_seq_ids_multi_line_aa(multi_line_fasta_aa):
+
+    seq_set, is_nucleotide, single_line_fasta = get_seq_ids(multi_line_fasta_aa)
+    assert len(seq_set) == 2
+    assert seq_set == {'Gene_1', 'Gene_2'}
+    assert not is_nucleotide
+    assert not single_line_fasta
 
 
 
diff --git a/tests/pytest.ini b/tests/pytest.ini
new file mode 100644
index 00000000..393e1386
--- /dev/null
+++ b/tests/pytest.ini
@@ -0,0 +1,4 @@
+[pytest]
+markers =
+	nucleotides: marks tests to run with nucleotides alphabets
+	aminoacids: marks tests to run with amino-acids alphabets
\ No newline at end of file
diff --git a/tests/test_metadata.py b/tests/test_metadata.py
index d4bab2e4..95928b05 100644
--- a/tests/test_metadata.py
+++ b/tests/test_metadata.py
@@ -83,24 +83,24 @@ def metafeatures(self, metadata) -> Generator[MetaFeatures, None, None]:
         """
         metafeatures = MetaFeatures()
         for meta in metadata:
-            metafeatures.add_metadata(meta.source, meta)
+            metafeatures.add_metadata(meta)
         yield metafeatures
 
     def test_add_metadata(self, metafeatures, metadata):
         """Tests that metadata can be added to the metadata getter
         """
-        assert all(metafeatures._metadata_getter[meta.source] == [meta] for meta in metadata)
+        assert all(list(metafeatures._metadata_getter[meta.source].values()) == [meta] for meta in metadata)
 
     def test_get_metadata_feature_corresponding_to_source(self, metafeatures, metadata):
         """Tests that all the metadata features corresponding to a source can be retrieved
         """
-        assert all(metafeatures.get_metadata_by_source(meta.source) == [meta] for meta in metadata)
+        assert all(list(metafeatures.get_metadata_by_source(meta.source).values()) == [meta] for meta in metadata)
 
     def test_remove_source_from_feature(self, metafeatures):
         """Tests that a source can be removed from the feature
         """
         metadata = Metadata("source_del", attribute1="value")
-        metafeatures.add_metadata("source_del", metadata)
+        metafeatures.add_metadata(metadata)
         metafeatures.del_metadata_by_source("source_del")
         assert metafeatures.get_metadata_by_source("source_del") is None
 
@@ -114,7 +114,7 @@ def test_get_metadata_by_attribute_values(self, metafeatures):
         """
         meta = Metadata("source_test", attribute1="value_to_retrieve")
         # meta_list = Metadata("source_list", attribute1=["val_1", "val_2"])
-        metafeatures.add_metadata(meta.source, meta)
+        metafeatures.add_metadata(meta)
         # metafeatures[meta_list.source] = meta_list
         assert list(metafeatures.get_metadata_by_attribute(attribute1="value_to_retrieve")) == [meta]
         # assert list(metafeatures.get_metadata(attribute1="val_1")) == [meta_list]
@@ -124,26 +124,12 @@ def test_get_maximum_number_of_metadata_for_one_source(self, metafeatures, metad
         """
         metadata1 = Metadata("source_max", attribute1="value1")
         metadata2 = Metadata("source_max", attribute2="value2")
-        metafeatures.add_metadata("source_max", metadata1)
-        metafeatures.add_metadata("source_max", metadata2)
+        metafeatures.add_metadata(metadata1)
+        metafeatures.add_metadata(metadata2)
         assert metafeatures.max_metadata_by_source() == ("source_max", 2)
 
     def test_metadata_is_not_with_type_metadata(self, metafeatures):
         """Tests that an AssertionError is raised when metadata is not with type Metadata
         """
         with pytest.raises(AssertionError):
-            metafeatures.add_metadata("source1", "not_metadata")
-
-    def test_source_is_not_a_string(self, metafeatures):
-        """Tests that an AssertionError is raised when the source is not a string
-        """
-
-        metadata = Metadata("source1", attribute1="value1")
-        with pytest.raises(AssertionError):
-            metafeatures.add_metadata(1, metadata)
-
-    def test_source_or_metadata_is_not_with_correct_type(self, metafeatures, metadata):
-        """Tests that an AssertionError is raised when the source or metadata is not with the correct type
-        """
-        with pytest.raises(AssertionError):
-            metafeatures.add_metadata(1, "not_metadata")
+            metafeatures.add_metadata("not_metadata")
diff --git a/tests/test_pangenome.py b/tests/test_pangenome.py
index d67eb1ce..90483079 100644
--- a/tests/test_pangenome.py
+++ b/tests/test_pangenome.py
@@ -804,25 +804,25 @@ def add_element_to_pangenome(self, pangenome):
         """
         metadata = Metadata(source="source", attribute="attr")
         family = GeneFamily(family_id=pangenome.max_fam_id, name="Fam")
-        family.add_metadata(source=metadata.source, metadata=metadata)
+        family.add_metadata(metadata=metadata)
         pangenome.add_gene_family(family)
         org = Organism("Org")
-        org.add_metadata(source=metadata.source, metadata=metadata)
+        org.add_metadata(metadata=metadata)
         ctg = Contig(0, "Ctg")
         org.add(ctg)
         gene = Gene("Gene")
-        gene.position, gene.start = (0, 0)
-        gene.add_metadata(source=metadata.source, metadata=metadata)
-        ctg[gene.start] = gene
+        gene.fill_annotations(start=0, stop=100, position=0, strand='+')
+        gene.add_metadata(metadata=metadata)
+        ctg.add(gene)
         pangenome.add_organism(org)
         rgp = Region("RGP")
-        rgp.add_metadata(source=metadata.source, metadata=metadata)
+        rgp.add_metadata(metadata=metadata)
         pangenome.add_region(rgp)
         spot = Spot(0)
-        spot.add_metadata(source=metadata.source, metadata=metadata)
+        spot.add_metadata(metadata=metadata)
         pangenome.add_spot(spot)
         module = Module(0)
-        module.add_metadata(source=metadata.source, metadata=metadata)
+        module.add_metadata(metadata=metadata)
         pangenome.add_module(module)
 
     def test_select_elem(self, add_element_to_pangenome, pangenome):
@@ -876,7 +876,7 @@ def test_get_elem_by_metadata(self, add_element_to_pangenome, pangenome):
                     assert isinstance(metadata, Metadata)
                     assert metadata.source == 'source'
 
-    def test_get_elem_by_sources(self, add_element_to_pangenome, pangenome):
+    def test_get_elem_by_source(self, add_element_to_pangenome, pangenome):
         """Tests the metadata generator filtered by source of the Pangenome class.
 
         :param add_element_to_pangenome: Add elements to the pangenome
@@ -884,7 +884,7 @@ def test_get_elem_by_sources(self, add_element_to_pangenome, pangenome):
         """
         for metatype, expected_type in {"families": GeneFamily, "genomes": Organism, "genes": Gene, "RGPs": Region,
                                         "spots": Spot, "modules": Module}.items():
-            for elem in pangenome.get_elem_by_sources(source='source', metatype=metatype):
+            for elem in pangenome.get_elem_by_source(source='source', metatype=metatype):
                 assert isinstance(elem, expected_type)
                 for metadata in elem.metadata:
                     assert isinstance(metadata, Metadata)