-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathRio
executable file
·139 lines (123 loc) · 3.19 KB
/
Rio
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#!/bin/bash
# Rio: Load CSV from stdin into R as a data.frame, execute given commands, and get the output as CSV or PNG on stdout
#
# Example usage:
# $ < seq 100 | Rio -nf sum (same as Rio -ne 'sum(df)')
#
# $ curl -s 'https://raw.githubusercontent.com/pydata/pandas/master/pandas/tests/data/iris.csv' > iris.csv
# $ < iris.csv Rio -e 'df$SepalLength^2'
# $ < iris.csv Rio -f summary
# $ < iris.csv Rio -se 'sqldf("select Name from df where df.SepalLength > 7")'
# $ < iris.csv Rio -ge 'g+geom_point(aes(x=SepalLength,y=SepalWidth,colour=Name))' > iris.png
#
# Dependency: R (with optionally the R packages ggplot2, dplyr, tidyr, sqldf, and ggmap)
#
# Author: http://jeroenjanssens.com
usage() {
cat << EOF
Rio: Load CSV from stdin into R as a data.frame, execute given commands, and get the output as CSV on stdout
usage: Rio OPTIONS
OPTIONS:
-d Delimiter
-e Commands to execute
-f Single command to execute on data.frame
-g Import ggplot2
-h Show this message
-m Import ggmap
-n CSV has no header
-r Import dplyr and tidyr
-s Import sqldf
-b Use same settings as used for book Data Science at the Command Line
-v Verbose
EOF
}
finish() {
rm -f $IN $OUT ${OUT%.png} ${ERR%.err}
## Removes error file if error file is empty.
if [[ ! -s $ERR ]]; then
rm -f $ERR
fi
rm -f Rplots.pdf
}
trap finish EXIT
callR() {
Rscript --vanilla -e "options(scipen=999);df<-read.csv('${IN}',header=${HEADER},sep='${DELIMITER}',stringsAsFactors=F);${REQUIRES}${SCRIPT}last<-.Last.value;if(is.matrix(last)){last<-as.data.frame(last)};if(is.data.frame(last)){write.table(last,'${OUT}',sep=',',quote=T,qmethod='double',row.names=F,col.names=${HEADER});}else if(is.vector(last)){cat(last,sep='\\\n', file='${OUT}')}else if(exists('is.ggplot')&&is.ggplot(last)){ggsave('${OUT}',last,dpi=${RIO_DPI-72},units='cm',width=20,height=15);}else{sink('${OUT}');print(last);}"
}
SCRIPT=
REQUIRES=
DELIMITER=","
HEADER="T"
VERBOSE=false
# OSX `mktemp' requires a temp file template, but Linux `mktemp' has it as optional.
# This explicitly uses a template, which works for both. The $TMPDIR is in case
# it isn't set as an enviroment variable, assumes you have /tmp.
if [ -z "$TMPDIR" ]; then
TMPDIR=/tmp/
fi
IN=$(mktemp ${TMPDIR}/Rio-XXXXXXXX)
OUT=$(mktemp ${TMPDIR}/Rio-XXXXXXXX).png
ERR=$(mktemp ${TMPDIR}/Rio-XXXXXXXX).err
while getopts "d:hgmnprsve:f:b" OPTION
do
case $OPTION in
b)
RIO_DPI=300
;;
d)
DELIMITER=$OPTARG
;;
e)
SCRIPT=$OPTARG
if ! echo $SCRIPT | grep -qe "; *$"
then
SCRIPT="${SCRIPT};"
fi
;;
f)
SCRIPT="${OPTARG}(df);"
;;
h)
usage
exit 1
;;
g)
REQUIRES="${REQUIRES}require(ggplot2);g<-ggplot(df);"
;;
n)
HEADER="F"
;;
r)
REQUIRES="${REQUIRES}require(dplyr);require(tidyr);"
;;
s)
REQUIRES="${REQUIRES}require(sqldf);"
;;
m)
REQUIRES="${REQUIRES}require(ggmap);"
;;
v)
VERBOSE=true
;;
?)
usage
exit
;;
esac
done
cat /dev/stdin > $IN
if $VERBOSE
then
callR
else
callR > $ERR 2>&1
fi
if [[ ! -f $OUT ]]; then
cat $ERR
else
RESULT="$(cat $OUT | tr '\0' '\n')"
if [ "$RESULT" == "NULL" ]; then
cat $ERR
else
cat $OUT
fi
fi