Skip to content

Commit

Permalink
feat: exclude rows without browser urls from catalog input (#17)
Browse files Browse the repository at this point in the history
  • Loading branch information
hunterckx committed Sep 3, 2024
1 parent 06b3181 commit d52ab60
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 2,776 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ export interface BRCDataCatalogGenome {
species: string;
strain: string;
supercontigs: number;
ucscBrowserUrl: string | null;
ucscBrowserUrl: string;
vEuPathDbProject: string;
}
6 changes: 1 addition & 5 deletions data-catalog/files/build-catalog.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ async function buildGenomes(): Promise<BRCDataCatalogGenome[]> {
species: row.Species,
strain: row.Strain,
supercontigs: parseNumber(row.Supercontigs),
ucscBrowserUrl: parseStringOrNull(row.ucscBrowser),
ucscBrowserUrl: row.ucscBrowser,
vEuPathDbProject: row["VEuPathDB Project"],
})
);
Expand All @@ -52,10 +52,6 @@ async function saveJson(filePath: string, data: unknown): Promise<void> {
await fsp.writeFile(filePath, JSON.stringify(data, undefined, 2) + "\n");
}

function parseStringOrNull(value: string): string | null {
return value || null;
}

function parseNumber(value: string): number {
value = value.trim();
const n = Number(value);
Expand Down
2 changes: 1 addition & 1 deletion data-catalog/files/build-genomes-files.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def build_genomes_files():
gen_bank_merge_df = genomes_source_df.merge(assemblies_df, how="left", left_on="Genome Version/Assembly ID", right_on="genBank")
ref_seq_merge_df = genomes_source_df.merge(assemblies_df, how="left", left_on="Genome Version/Assembly ID", right_on="refSeq")

result_df = gen_bank_merge_df.combine_first(ref_seq_merge_df)
result_df = gen_bank_merge_df.combine_first(ref_seq_merge_df).dropna(subset=["ucscBrowser"])

result_df.to_csv(OUTPUT_PATH, index=False, sep="\t")

Expand Down
Loading

0 comments on commit d52ab60

Please sign in to comment.