Skip to content

Commit

Permalink
fix compute and visualize
Browse files Browse the repository at this point in the history
  • Loading branch information
Raiduy committed Jun 5, 2023
1 parent c7fa9a2 commit 657a75a
Show file tree
Hide file tree
Showing 5 changed files with 23 additions and 31 deletions.
9 changes: 7 additions & 2 deletions packages/compute/compute.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
import os


def compute(data_path, coli_path):
def compute(data_path):
pd.options.mode.chained_assignment = None
dataset = pd.read_csv(f"{data_path}/dataset.csv")
coli = pd.read_csv(f"{data_path}/coli.csv")
us_sdr = pd.read_csv(f"{data_path}/us_sdr.csv")

dataset = clean_data(dataset)
# dataset = clean_data(dataset)

dataset['State'] = dataset['Location'].apply(get_state)
dataset['City'] = dataset['Location'].apply(lambda x: x.split(',')[0])
Expand All @@ -34,6 +34,11 @@ def compute(data_path, coli_path):
# generic data cleaning
def clean_data(df_path):
df = pd.read_csv(f'{df_path}/dataset.csv')
coli = pd.read_csv(f"{df_path}/coli.csv")
us_sdr = pd.read_csv(f"{df_path}/us_sdr.csv")

coli.to_csv('/result/coli.csv')
us_sdr.to_csv('/result/us_sdr.csv')

df.drop(df.columns[0], axis=1, inplace=True)
df['Salary Estimate'] = df['Salary Estimate'].apply(lambda x: x.split('(')[0])
Expand Down
4 changes: 1 addition & 3 deletions packages/compute/container.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,7 @@ actions:
- process_data
input:
- type: IntermediateResult
name: filepath_data
- type: IntermediateResult
name: filepath_coli
name: filepath
output:
- type: IntermediateResult
name: filepath
Expand Down
2 changes: 1 addition & 1 deletion packages/compute/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def main():
command = sys.argv[1]

if command == "process_data":
dataset = compute.compute(f"{json.loads(os.environ['FILEPATH_DATA'])}", f"{json.loads(os.environ['FILEPATH_COLI'])}")
dataset = compute.compute(f"{json.loads(os.environ['FILEPATH'])}")
return

run_action(command, json.loads(os.environ['FILEPATH']))
Expand Down
2 changes: 1 addition & 1 deletion packages/visualization/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def visualization_action(
average_adjusted_salary_by_industry_img = plot_average_adjusted_salary_by_industry(data_analyst_jobs)
average_combined_rank_per_location_img = plot_average_combined_rank_per_location(data_analyst_jobs)
adjusted_and_rank_img = plot_adjutsed_and_rank(data_analyst_jobs)
template_html = codecs.open("packages/visualization/result.html", "r", "utf-8")
template_html = codecs.open("./result.html", "r", "utf-8")

result = template_html.read().format(
salary_estimate=salary_estimate_img,
Expand Down
37 changes: 13 additions & 24 deletions pipeline.bs
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,19 @@
import compute;
import visualization;

println("Cleaning data...");
let cleaned := clean_data(new Data { name := "job_data" });
commit_result("cleaned_data", cleaned);
// dataset can be found under ~/.local/share/brane/data/cleaned_data/data/dataset.csv
println("Cleaned!");

// Change this location according to your instance
on "localhost" {

// println("Computing...");
// // !!! TODO: job_data might have to be split for coli.csv and dataset.csv, then passed as 2 arguments
// let msg := compute();
// println("Computed!");
// println(msg);
println("Processing data...");
let processed := process_data(cleaned);
commit_result("processed_data", processed);
println("Processed!");

println("Cleaning data...");
let cleaned := clean_data(new Data { name := "job_data" });
commit_result("cleaned_data", cleaned);
// dataset can be found under ~/.local/share/brane/data/cleaned_data/data/dataset.csv
println("Cleaned!");

println("Processing data...");
let processed := process_data(cleaned, new Data { name := "job_data" });
commit_result("processed_data", processed);
println("Processed!");


// Visualization
let vis := visualization_action(processed);
commit_result("visualization", vis);
return;
}
// Visualization
let vis := visualization_action(processed);
commit_result("visualization", vis);
return;

0 comments on commit 657a75a

Please sign in to comment.