Skip to content

Commit

Permalink
script updates, add hash to results
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesamcl committed Jan 8, 2025
1 parent 7c19443 commit 820ff5f
Show file tree
Hide file tree
Showing 7 changed files with 55 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,7 @@ csv = "1.3.0"
lmdb-zero = "0.4.4"
bloomfilter = "1.0.13"
jemallocator = "0.5.4"
clap = { version = "4.4.11", features = ["derive"] }
clap = { version = "4.4.11", features = ["derive"] }
sha1 = "0.10.6"
hex = "0.4.3"

Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ use std::io::{Write, BufWriter};
use grebi_shared::json_lexer::{lex, JsonTokenType};
use grebi_shared::json_parser::JsonParser;
use clap::Parser;
use sha1::{Sha1, Digest};

use serde_json::Value;

Expand Down Expand Up @@ -91,6 +92,14 @@ fn main() {
json.insert("_refs".to_string(), Value::Object(refs));
json.insert("_node_ids".to_string(), Value::Array( nodeids.iter().map(|id| Value::String(id.clone())).collect()));

// sha1 not for security, just as a simple way to assign a unique
// id to the result to use to reference it in the api
//
let mut hasher = Sha1::new();
hasher.update(&line);
let hash = hasher.finalize();
json.insert("id".to_string(), Value::String(hex::encode(hash)));

writer.write_all(Value::Object(json).to_string().as_bytes()).unwrap();
writer.write_all("\n".as_bytes()).unwrap();

Expand Down
2 changes: 2 additions & 0 deletions dataload/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 6 additions & 6 deletions dataload/nextflow/load_subgraph.nf
Original file line number Diff line number Diff line change
Expand Up @@ -300,7 +300,7 @@ process create_sqlite {
errorStrategy 'retry'
maxRetries 10

publishDir "${params.out}/${params.subgraph}", overwrite: true
publishDir "${params.out}", overwrite: true

input:
val(compressed_blobs)
Expand Down Expand Up @@ -424,7 +424,7 @@ process run_materialised_queries {
time "8h"
cpus "8"

publishDir "${params.out}/${params.subgraph}", overwrite: true
publishDir "${params.out}", overwrite: true

input:
path(neo_db)
Expand Down Expand Up @@ -500,7 +500,7 @@ process add_query_metadatas_to_graph_metadata {
time "8h"
cpus "8"

publishDir "${params.out}/${params.subgraph}", overwrite: true
publishDir "${params.out}", overwrite: true

input:
path(metadata_jsons)
Expand All @@ -526,7 +526,7 @@ process csvs_to_sqlite {
time "12h"
cpus "8"

publishDir "${params.out}/${params.subgraph}", overwrite: true
publishDir "${params.out}", overwrite: true

input:
path(csvs)
Expand Down Expand Up @@ -659,7 +659,7 @@ process package_neo {
time "8h"
cpus "8"

publishDir "${params.out}${params.subgraph}", overwrite: true
publishDir "${params.out}", overwrite: true

input:
path("${params.subgraph}_neo4j")
Expand All @@ -679,7 +679,7 @@ process package_solr {
time "8h"
cpus "8"

publishDir "${params.out}/${params.subgraph}", overwrite: true
publishDir "${params.out}", overwrite: true

input:
path(cores)
Expand Down
Empty file modified dataload/scripts/check_datarelease.sh
100644 → 100755
Empty file.
31 changes: 31 additions & 0 deletions dataload/scripts/ebi_datarelease_to_ftp.sh
Original file line number Diff line number Diff line change
@@ -1,2 +1,33 @@
#!/bin/bash

#!/bin/bash

if [ "$SLURM_JOB_PARTITION" != "datamover" ]; then
echo "Must run on a datamover node"
exit 1
fi

if [ "$#" -ne 2 ]; then
echo "Usage: $0 <subgraph> <datarelease_path>"
exit 1
fi

SUBGRAPH=$1
DATARELEASE_PATH=$2
VERSION=$(date +"%Y-%b-%d")

./check_datarelease.sh $SUBGRAPH $DATARELEASE_PATH

FTP_PATH=/nfs/ftp/public/databases/spot/kg/$SUBGRAPH/$VERSION
LATEST_PATH=/nfs/ftp/public/databases/spot/kg/$SUBGRAPH/latest

echo "Copying $DATARELEASE_PATH to $FTP_PATH"

rm -rf $FTP_PATH/*
cp -Lr $DATARELEASE_PATH/* $FTP_PATH/
ln -s $FTP_PATH $LATEST_PATH





6 changes: 3 additions & 3 deletions dataload/scripts/ebi_datarelease_to_staging.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,9 @@ rm -rf $STAGING_PATH/sqlite/${SUBGRAPH}.sqlite3
echo Extracting new data release

tar --use-compress-program=pigz -xf $DATARELEASE_PATH/${SUBGRAPH}_neo4j.tgz -C $STAGING_PATH/neo4j
tar --use-compress-program=pigz -xf $DATARELEASE_PATH/${SUBGRAPH}_solr.tgz -C $STAGING_PATH/solr
cp -f $DATARELEASE_PATH/${SUBGRAPH}_metadata.json $STAGING_PATH/metadata
cp -f $DATARELEASE_PATH/${SUBGRAPH}.sqlite3 $STAGING_PATH/sqlite
tar --use-compress-program=pigz -xf $DATARELEASE_PATH/${SUBGRAPH}_solr.tgz -C $STAGING_PATH
cp -f $DATARELEASE_PATH/${SUBGRAPH}_metadata.json $STAGING_PATH/metadata/
cp -f $DATARELEASE_PATH/${SUBGRAPH}.sqlite3 $STAGING_PATH/sqlite/



Expand Down

0 comments on commit 820ff5f

Please sign in to comment.