-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathETL_COURSE_Lab1.txt
84 lines (64 loc) · 2.77 KB
/
ETL_COURSE_Lab1.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#We use command echo to write a string and cut to extract data
#(from a file or from a string)
echo "database" | cut -c1-4
echo "database" | cut -c5-8
#Non-contiguous characters can be extracted using the comma.
echo "database" | cut -c1,5
#We can extract a specific column/field from a delimited text file
cut -d":" -f1 /etc/passwd
cut -d":" -f1,3,6 /etc/passwd
cut -d":" -f3-6 /etc/passwd
#tr is a filter command used to translate, squeeze,
#and/or delete characters.
echo "Shell Scripting" | tr "[a-z]" "[A-Z]"
echo "Shell Scripting" | tr "[:lower:]" "[:upper:]"
echo "Shell Scripting" | tr "[A-Z]" "[a-z]"
#The -s option replaces a
#sequence of a repeated characters with a single occurrence of that character
ps | tr -s " "
#We can delete specified characters using the -d option.
echo "My login pin is 5634" | tr -d "[:digit:]"
#We use start_postgres command to start a postgresSQL database.
#We use \c command to connect to a database
#We use \q command to quit the Postgres plsql
#Objectives of the exercice 5 in Lab 1
# This script
# Extracts data from /etc/passwd file into a CSV file.
# The csv data file contains the user name, user id and
# home directory of each user account defined in /etc/passwd
# Transforms the text delimiter from ":" to ",".
# Loads the data from the CSV file into a table in PostgreSQL database.
#Exercices 5 solutions
# Extract phase
echo "Extracting data"
# Extract the columns 1 (user name), 2 (user id) and
# 6 (home directory path) from /etc/passwd
cut -d":" -f1,3,6 /etc/passwd
#Redirect the extracted data into a file named extracted-data.txt
cut -d":" -f1,3,6 /etc/passwd > extracted-data.txt
#To verify that the extracted-data.txt file existed
cat extracted-data.txt
#Transform the data into CSV format
#Note that the extracted-data.txt is delimited by ":"
We need to change this delimiter by ",".
# Transform phase
echo "Transforming data"
# read the extracted data and replace the colons with commas.
tr ":" "," < extracted-data.txt
#Load the data to a csv file
tr ":" "," < extracted-data.txt > transformed-data.csv
Verify that the csv file has beed created.
cat transformed-data.csv
#Load phase
echo "Loading data"
# Send the instructions to connect to 'template1' and
# copy the file to the table 'users' through command pipeline.
echo "\c template1;\COPY users
FROM '/home/project/transformed-data.csv' DELIMITERS ',' CSV;" | psql --username=postgres --host=localhost
To query the table users
echo '\c template1; \\SELECT * from users;' | psql --username=postgres --host=localhost
# We use wget command to scrap data from URL
wget "link"
#We use the gunzip command to unzip file
# Unzip the file to extract the .txt file.
gunzip -f web-server-access-log.txt.gz