diff --git a/analysis-logs/README.md b/analysis-logs/README.md new file mode 100644 index 00000000..ae9cfa1d --- /dev/null +++ b/analysis-logs/README.md @@ -0,0 +1,13 @@ +# Instructions + +Ensure that you have obtained `analysis-logs/input/access.log`. + +The script `analysis-logs/input.sh` downloads this file with the Kaggle API. In +order to use the Kaggle API, you must have the API token: `~/.kaggle/kaggle.json`. + +From : + +> In order to use the Kaggle’s public API, you must first authenticate using an +> API token. Go to the 'Account' tab of your user profile and select 'Create New +> Token'. This will trigger the download of kaggle.json, a file containing your +> API credentials. diff --git a/analysis-logs/input/cleanup.sh b/analysis-logs/cleanup.sh similarity index 69% rename from analysis-logs/input/cleanup.sh rename to analysis-logs/cleanup.sh index a6f4207f..27bee4a3 100755 --- a/analysis-logs/input/cleanup.sh +++ b/analysis-logs/cleanup.sh @@ -1,7 +1,7 @@ #!/bin/bash REPO_TOP=$(git rev-parse --show-toplevel) -results_dir="${REPO_TOP}/covid-mts/results" +results_dir="${REPO_TOP}/analysis-logs/results" echo "Cleaning up outputs..." rm -rf $results_dir diff --git a/analysis-logs/hashes/results.full.md5sum b/analysis-logs/hashes/results.full.md5sum new file mode 100644 index 00000000..4a597a90 --- /dev/null +++ b/analysis-logs/hashes/results.full.md5sum @@ -0,0 +1 @@ +105b333d6b49399cc10763a5f629343a results.full/out diff --git a/analysis-logs/hashes/results.small.md5sum b/analysis-logs/hashes/results.small.md5sum new file mode 100644 index 00000000..cfdd4445 --- /dev/null +++ b/analysis-logs/hashes/results.small.md5sum @@ -0,0 +1 @@ +105b333d6b49399cc10763a5f629343a results.small/out diff --git a/analysis-logs/input.sh b/analysis-logs/input.sh new file mode 100755 index 00000000..1995781b --- /dev/null +++ b/analysis-logs/input.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +REPO_TOP=$(git rev-parse --show-toplevel) +DIR=$REPO_TOP/analysis-logs/input +mkdir -p $DIR + +# Set up Kaggle API +if [[ ! -d ~/.kaggle ]]; then + mkdir ~/.kaggle + echo "Place your kaggle.json in the ~/.kaggle directory." +fi +chmod 600 ~/.kaggle/kaggle.json + +cd $DIR +kaggle datasets download -d eliasdabbas/web-server-access-logs +unzip web-server-access-logs +rm -f web-server-access-logs.zip client_hostname.csv diff --git a/analysis-logs/input/.gitignore b/analysis-logs/input/.gitignore new file mode 100644 index 00000000..8b137891 --- /dev/null +++ b/analysis-logs/input/.gitignore @@ -0,0 +1 @@ + diff --git a/analysis-logs/input/.kaggle/kaggle.json b/analysis-logs/input/.kaggle/kaggle.json deleted file mode 100644 index fa03de64..00000000 --- a/analysis-logs/input/.kaggle/kaggle.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "username":"ADD YOUR KAGGLE USERNAME", - "key":"ADD YOUR KAGGLE API KEY" -} diff --git a/analysis-logs/input/checksum.md5 b/analysis-logs/input/checksum.md5 deleted file mode 100644 index ad952be9..00000000 --- a/analysis-logs/input/checksum.md5 +++ /dev/null @@ -1 +0,0 @@ -3fe6814c6d6f2edd73a83c35f45aa024 results/nginx.sh.out diff --git a/analysis-logs/input/input.sh b/analysis-logs/input/input.sh deleted file mode 100755 index af67bb26..00000000 --- a/analysis-logs/input/input.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -REPO_TOP=$(git rev-parse --show-toplevel) -DIR=$REPO_TOP/analysis-logs/input -mkdir -p $DIR -cd $DIR - -if [[ $1 == "--kaggle" ]]; then - # Set up Kaggle API - if [[ ! -d ~/.kaggle ]]; then - mkdir ~/.kaggle - echo "Place your kaggle.json in the ~/.kaggle directory." - fi - chmod 600 ~/.kaggle/kaggle.json - - if [[ ! -f nginx.zip ]]; then - kaggle datasets download -d eliasdabbas/web-server-access-logs - unzip web-server-access-logs - rm -f web-server-access-logs.zip client_hostname.csv - fi -else - if [[ ! -f nginx.zip ]]; then - # TODO: replace with omega URL - # wget -O nginx.zip "https://dataverse.harvard.edu/api/access/datafile/:persistentId?persistentId=doi:10.7910/DVN/3QBYB5/NXKB6J" - # unzip web-server-access-logs - # rm -f web-server-access-logs.zip - echo "Not implemented yet." - exit 1 - fi -fi diff --git a/analysis-logs/input/run.sh b/analysis-logs/input/run.sh deleted file mode 100755 index 20fdf685..00000000 --- a/analysis-logs/input/run.sh +++ /dev/null @@ -1,17 +0,0 @@ -#!/bin/bash - -REPO_TOP=$(git rev-parse --show-toplevel) - -eval_dir="${REPO_TOP}/analysis-logs" -results_dir="${eval_dir}/results" -inputs_dir="${eval_dir}/input" - -shell="/bin/bash" - -mkdir -p $results_dir - -export INPUT=${inputs_dir}/access.log -script="${eval_dir}/nginx.sh" - -echo "Executing $(basename "$script")" -$shell "$script" > "$results_dir/$(basename "$script").out" diff --git a/analysis-logs/input/verify.sh b/analysis-logs/input/verify.sh deleted file mode 100755 index b8a4a60c..00000000 --- a/analysis-logs/input/verify.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/bin/bash - -REPO_TOP=$(git rev-parse --show-toplevel) - -eval_dir="${REPO_TOP}/analysis-logs/" -results_dir="${eval_dir}/results" -input_dir="${eval_dir}/input" - -if [ "$(md5sum $results_dir/* | awk '{print $1}')" == "$(cat $input_dir/checksum.md5 | awk '{print $1}')" ]; -then - echo "Valid" -else - echo "Invalid" - exit 1 -fi diff --git a/analysis-logs/run.sh b/analysis-logs/run.sh new file mode 100755 index 00000000..d6617436 --- /dev/null +++ b/analysis-logs/run.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +REPO_TOP=$(git rev-parse --show-toplevel) + +eval_dir="${REPO_TOP}/analysis-logs" +results_dir="${eval_dir}/results" +scripts_dir="${eval_dir}/scripts" +input_dir="${eval_dir}/input" +mkdir -p $results_dir + +export INPUT=${input_dir}/access.log + +suffix=".full" +if [[ "$@" == *"--small"* ]]; then + suffix=".small" +fi + +log_dir="$results_dir/results$suffix" +mkdir -p $log_dir +$scripts_dir/nginx.sh > $log_dir/out diff --git a/analysis-logs/nginx.sh b/analysis-logs/scripts/nginx.sh old mode 100644 new mode 100755 similarity index 100% rename from analysis-logs/nginx.sh rename to analysis-logs/scripts/nginx.sh diff --git a/analysis-logs/verify.sh b/analysis-logs/verify.sh new file mode 100755 index 00000000..1851b365 --- /dev/null +++ b/analysis-logs/verify.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +REPO_TOP=$(git rev-parse --show-toplevel) + +eval_dir="${REPO_TOP}/analysis-logs" +input_dir="${eval_dir}/input" +hashes_dir="${eval_dir}/hashes" +results_dir="${eval_dir}/results" +mkdir -p $results_dir + +suffix=".full" +if [[ "$@" == *"--small"* ]]; then + suffix=".small" +fi + +cd $results_dir # md5sum computes paths relative to cd +if [[ "$@" == *"--generate"* ]]; then + md5sum results$suffix/* > $hashes_dir/results$suffix.md5sum +fi + +okay=0 +if ! md5sum --check --quiet $hashes_dir/results$suffix.md5sum; then + okay=1 + echo "img_convert $suffix failed verification" +fi +exit $okay