diff --git a/backend/src/main.py b/backend/src/main.py index 43bca57c..0559f509 100644 --- a/backend/src/main.py +++ b/backend/src/main.py @@ -95,7 +95,7 @@ def proteins_subgraph_api(): selected_d = request.form.get("selected_d").split(",") if request.form.get("selected_d") else None threshold = int(float(request.form.get("threshold")) * 1000) - proteins, protein_ids = queries.get_protein_ids_for_names(driver, protein_names, species_id) + proteins, protein_ids, symbol_alias_mapping = queries.get_protein_ids_for_names(driver, protein_names, species_id) stopwatch.round("Setup") @@ -164,6 +164,7 @@ def proteins_subgraph_api(): ensembl_id = node["id"] df_node = ensembl_to_node.get(ensembl_id) if df_node: + symbol_value = df_node.SYMBOL if ensembl_id in node_mapping: mapped_node_id = node_mapping[ensembl_id] # Use node mapping to add corresponding values of betweenness and pagerank @@ -171,11 +172,16 @@ def proteins_subgraph_api(): node["attributes"]["PageRank"] = str(pagerank[mapped_node_id]) node["attributes"]["Description"] = df_node.annotation node["attributes"]["Ensembl ID"] = df_node.external_id - node["attributes"]["Name"] = df_node.SYMBOL + node["attributes"]["Name"] = symbol_value if not (request.files.get("file") is None): if selected_d != None: for column in selected_d: - node["attributes"][column] = panda_file.loc[panda_file["name"] == df_node.SYMBOL, column].item() + if symbol_value in symbol_alias_mapping: + # If a symbol was found through its alias we have + # to keep the alias name so the value can be taken + # from the input file correctly + symbol_value = symbol_alias_mapping[symbol_value] + node["attributes"][column] = panda_file.loc[panda_file["name"] == symbol_value, column].item() node["label"] = df_node.SYMBOL node["species"] = str(10090) diff --git a/backend/src/queries.py b/backend/src/queries.py index 6268b19f..901a34ec 100644 --- a/backend/src/queries.py +++ b/backend/src/queries.py @@ -28,7 +28,10 @@ def get_terms_connected_by_overlap(driver: neo4j.Driver, term_ids: list[str], sp return _convert_to_connection_info_score(result=result, _int=False, protein=False) -def get_protein_ids_for_names(driver: neo4j.Driver, names: list[str], species_id: int) -> (list, list[str]): +def get_protein_ids_for_names(driver: neo4j.Driver, names: list[str], species_id: int) -> (list, list[str], dict): + """ + Returns: protein, protein_id and a dictionary of format (Symbol: Alias) of all the symbols found from aliases + """ # unsafe parameters because otherwise this query takes 10s with neo4j for unknown reasons if species_id == 10090: species = "Mus_Musculus" @@ -44,7 +47,7 @@ def get_protein_ids_for_names(driver: neo4j.Driver, names: list[str], species_id # Retrieve all the symbols that correspond to aliases found in names with driver.session() as session: result = session.run(query) - symbols_set, aliases_set = _convert_to_symbol_alias(result) + symbols_set, aliases_set, mapping = _convert_to_symbol_alias(result) # To make less calls to the database, remove the aliases and add their corresponding symbol genes_set = set(names) result_names = list(genes_set - aliases_set) + list(symbols_set - genes_set) @@ -56,7 +59,8 @@ def get_protein_ids_for_names(driver: neo4j.Driver, names: list[str], species_id """ with driver.session() as session: result = session.run(query) - return _convert_to_protein_id(result) + protein, id = _convert_to_protein_id(result) + return protein, id, mapping def get_protein_neighbours( @@ -151,10 +155,16 @@ def _convert_to_protein_id(result: neo4j.Result) -> (list, list[str]): def _convert_to_symbol_alias(result: neo4j.Result) -> (set[str], set[str]): symbols = set() aliases = set() + mapping = {} for row in result: - symbols.add(row["symbol"]) - aliases.add(row["found_alias"]) - return symbols, aliases + symbol = row["symbol"] + alias = row["found_alias"] + symbols.add(symbol) + aliases.add(alias) + # Only add the (symbol: alias) if the symbol isnt there already + if row["symbol"] not in mapping: + mapping[symbol.title()] = alias.title() + return symbols, aliases, mapping def _convert_to_connection_info_score(