-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathMakefile
105 lines (81 loc) · 2.94 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
OCR_OUTPUTS := $(patsubst pdf/%.pdf, procedure-codes/%.txt, $(wildcard pdf/*.pdf))
CLEAN_CODES := $(patsubst procedure-codes/%.txt, cleaned-codes/%.txt, $(wildcard procedure-codes/*.txt))
SPLIT_CODES := $(patsubst cleaned-codes/%.txt, procedure-code-sections/%-SPLIT.txt, $(wildcard cleaned-codes/*.txt))
all : cache/corpus-lsh.rda cache/network-graphs.rda article/Funk-Mullen.Spine-of-American-Law.pdf clusters
# Setup tasks
.PHONY : setup packrat dirs
setup : | packrat dirs
packrat :
Rscript -e "packrat::restore()"
dirs :
mkdir -p cleaned-codes proc
mkdir -p procedure-code-sections
mkdir -p out
mkdir -p out/clusters
mkdir -p out/figures
mkdir -p out/matches
mkdir -p cache
# Clean up the codes in `procedure-codes/`
.PHONY : codes
codes : $(CLEAN_CODES)
cleaned-codes/%.txt : procedure-codes/%.txt
Rscript --vanilla scripts/clean-text.R $^ $@
# Split the codes into sections
.PHONY : splits
splits : $(SPLIT_CODES)
procedure-code-sections/%-SPLIT.txt : cleaned-codes/%.txt
Rscript --vanilla scripts/split-code.R $<
@touch $@
# Find the similarities in the split codes
.PHONY : lsh
lsh : cache/corpus-lsh.rda
cache/corpus-lsh.rda : $(SPLIT_CODES)
Rscript --vanilla scripts/corpus-lsh.R
# Create the network graph data from the split codes
.PHONY : network
network : cache/network-graphs.rda
cache/network-graphs.rda : cache/corpus-lsh.rda
Rscript --vanilla scripts/network-graphs.R
# Create the clusters
.PHONY : clusters
clusters : out/clusters/DONE.txt
out/clusters/DONE.txt : cache/corpus-lsh.rda
Rscript --vanilla scripts/cluster-sections.R && \
touch $@
# Create the article
.PHONY : article
article : article/Funk-Mullen.Spine-of-American-Law.pdf
article/Funk-Mullen.Spine-of-American-Law.pdf : article/Funk-Mullen.Spine-of-American-Law.Rmd cache/corpus-lsh.rda cache/network-graphs.rda
R --slave -e "set.seed(100); rmarkdown::render('$<', output_format = 'all')"
# Update certain files in the research compendium for AHR
.PHONY : compendium
compendium :
zip -j compendium/all-section-matches.csv.zip out/matches/all_matches.csv
zip -j compendium/best-section-matches.csv.zip out/matches/best_matches.csv
zip -r compendium/procedure-codes.zip procedure-codes/
zip -r compendium/procedure-code-sections.zip procedure-code-sections/
zip -j -r compendium/clusters-of-sections.zip out/clusters/
git archive --format=zip --output=compendium/field-code-analysis.zip master
.PHONY : clean
clean :
rm -rf temp/*
.PHONY : clean-splits
clean-splits :
rm -f cleaned-codes/*
rm -rf procedure-code-sections
.PHONY : clean-clusters
clean-clusters :
rm -rf out/clusters
rm -f cache/clusters.rds
.PHONY : clean-compendium
clean-compendium :
rm -f compendium/all-section-matches.csv.zip
rm -f compendium/best-section-matches.csv.zip
rm -f compendium/procedure-codes.zip
rm -f compendium/procedure-code-sections.zip
rm -f compendium/clusters-of-sections.zip
rm -f compendium/field-code-analysis.zip
.PHONY : clobber
clobber : clean
rm -f cache/*
rm -rf out/*