From 12515eb8ff9c1d9a8c0aab7ec70b6d1dd746ec69 Mon Sep 17 00:00:00 2001 From: Gabor Szarnyas Date: Thu, 13 May 2021 11:19:43 +0200 Subject: [PATCH] Remove DBToaster implementation --- dbtoaster/README.md | 47 ---------- dbtoaster/bi-query-util.sh | 33 ------- dbtoaster/data/.gitignore | 4 - dbtoaster/prepare-data.sh | 42 --------- dbtoaster/queries/bi-1.sql | 33 ------- dbtoaster/queries/bi-12.sql | 33 ------- dbtoaster/queries/bi-17.sql | 58 ------------ dbtoaster/queries/bi-2.sql | 60 ------------ dbtoaster/queries/bi-23.sql | 24 ----- dbtoaster/queries/bi-3.sql | 46 --------- dbtoaster/queries/bi-4.sql | 33 ------- dbtoaster/queries/bi-9.sql | 49 ---------- dbtoaster/sandbox.sql | 74 --------------- dbtoaster/schema.sql | 180 ------------------------------------ 14 files changed, 716 deletions(-) delete mode 100644 dbtoaster/README.md delete mode 100755 dbtoaster/bi-query-util.sh delete mode 100644 dbtoaster/data/.gitignore delete mode 100755 dbtoaster/prepare-data.sh delete mode 100644 dbtoaster/queries/bi-1.sql delete mode 100644 dbtoaster/queries/bi-12.sql delete mode 100644 dbtoaster/queries/bi-17.sql delete mode 100644 dbtoaster/queries/bi-2.sql delete mode 100644 dbtoaster/queries/bi-23.sql delete mode 100644 dbtoaster/queries/bi-3.sql delete mode 100644 dbtoaster/queries/bi-4.sql delete mode 100644 dbtoaster/queries/bi-9.sql delete mode 100644 dbtoaster/sandbox.sql delete mode 100644 dbtoaster/schema.sql diff --git a/dbtoaster/README.md b/dbtoaster/README.md deleted file mode 100644 index 42ca10304..000000000 --- a/dbtoaster/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# DBToaster experimental implementation - -This folder holds some experimental, preliminary query implementations -for the [DBToaster] SQL interface - - -## Preparation - - 1. place CSV files of **merge foreign key flavour** generated by the datagen in the `data/raw` directory - 2. prepare CSV files for DBToaster by running `prepare-data.sh` from the LDBC SNB DBToaster implementation directory - -Note: `prepare-data.sh` accepts directory names as parameters to read and write to other directories: - - 1. the input directory, where the datagen's CSV files reside. Defaults to `./data/raw` - 2. the output data directory, where output files are placed. Defaults to `./data` - -## Running BI queries - -The `bi-query-util.sh` is provided for convenience to run a single BI query through DBToaster after loading the schema and data. - -This by default assumes that DBToaster binary if located at `$HOME/bin/dbtoaster/bin/dbtoaster`. -To override DBToaster path, set the `DBTOASTER_BIN` environment variable pointing to the DBToaster binary. - -Example: - -```shell -user@machine:~/git/ldbc_snb_implementations/dbtoaster$ ./bi-query-util.sh 1 -``` - -## Generate Scala JAR - -Again, the `bi-query-util.sh` can be used to generate Scala JAR for the given BI query. - -Simply provide the generator script with `jar` in the 2nd parameter to generate `ldbc-bi-q$n.jar` under the `target/` directory, where `$n` is the number of the BI query. - -Example: - -```shell -user@machine:~/git/ldbc_snb_implementations/dbtoaster$ ./bi-query-util.sh 1 jar -``` - -A few things to know about the generated query class: - - - the query name is `biQ$n`, again, `$n` stands for the query number - - the query looks for the input CSV files in the `data/` subdirectory of the current working dir - -[DBToaster]: https://dbtoaster.github.io/ diff --git a/dbtoaster/bi-query-util.sh b/dbtoaster/bi-query-util.sh deleted file mode 100755 index 840822d9e..000000000 --- a/dbtoaster/bi-query-util.sh +++ /dev/null @@ -1,33 +0,0 @@ -#!/bin/bash - -# exit upon non-zero pipe (roughly: command) status -set -e - -QUERY_NUMBER=$1 -ACTION=${2:-'run'} # supply jar to generate JAR file -QUERY_SET=${3:-'bi'} # give the query set name, defaults to bi -DBTOASTER_BIN=${DBTOASTER_BIN:-$HOME/bin/dbtoaster/bin/dbtoaster} - -if [ "${QUERY_NUMBER}x" == "x" ]; then - echo 1>&2 "ERROR: No query number was provided. Provide ${QUERY_SET} query number as first argument, e.g. '$0 1'" - exit -2 -fi - -TMP_SQL_SCRIPT=$(mktemp) - -cat >$TMP_SQL_SCRIPT < \ - $DATA_DIR/message_0_0.csv -cat $RAW_DIR/comment_0_0.csv | \ - awk -F '|' '{print $1"||"$2"|"$3"|"$4"||"$5"|"$6"|"$7"|"$8"||"$9 $10}' | \ - sed -n -e '2,$ p' >> \ - $DATA_DIR/message_0_0.csv - -# files that needs no specific transformations are copied -for f in forum forum_hasMember_person forum_hasTag_tag \ - organisation \ - person person_email_emailaddress person_hasInterest_tag person_knows_person \ - person_likes_post person_speaks_language \ - person_studyAt_organisation person_workAt_organisation \ - post_hasTag_tag tag tagclass \ - place; do - cp $RAW_DIR/$f$FILE_NAME_SUFFIX $DATA_DIR/$f$FILE_NAME_SUFFIX -done - - -for f in $DATA_DIR/*$FILE_NAME_SUFFIX ; do - # skip header row - sed -i -n -e '2,$ p' $f - - # replace empty (NULL) fields by -1 - perl -ne 's/\|(?=(\||$))/|-1/g; print;' $f >$f.new - mv $f.new $f - - # remove time component from date fields - sed -i -e 's/\(|\|^\)\([0-9]\{4\}-[0-9]\{2\}-[0-9]\{2\}\)T[0-9]\{2\}:[0-9]\{2\}:[0-9]\{2\}\.[0-9]\{3\}+[0-9]\{4\}\(|\|$\)/\1\2\3/g' $f -done diff --git a/dbtoaster/queries/bi-1.sql b/dbtoaster/queries/bi-1.sql deleted file mode 100644 index de9ffaeb0..000000000 --- a/dbtoaster/queries/bi-1.sql +++ /dev/null @@ -1,33 +0,0 @@ -/* Q1. Posting summary -\set date '\'2011-07-21T22:00:00.000+00:00\''::timestamp - */ -SELECT messageYear, message_order, lengthCategory - , count(*) AS messageCount - , avg(post_prep.m_length) AS averageMessageLength - , sum(post_prep.m_length) AS sumMessageLength - , count(*) / pc.cnt AS percentageOfMessages - FROM ( - SELECT extract(year from p1.m_creationdate) AS messageYear - , CASE WHEN p1.m_c_replyof = -1 THEN 0 ELSE 1 END AS message_order -- was: IS NOT NULL AS isComment - , CASE - WHEN p1.m_length < 40 THEN 0 -- short - WHEN p1.m_length < 80 THEN 1 -- one liner - WHEN p1.m_length < 160 THEN 2 -- tweet - ELSE 3 -- long - END AS lengthCategory - , p1.m_length - FROM message p1 - WHERE 1=1 - AND p1.m_creationdate < DATE('2011-07-22') -- FIXME:param - --AND m_content IS NOT NULL - AND p1.m_ps_imagefile = '-1' -- was: IS NULL - ) post_prep - , ( - SELECT count(*) AS cnt - FROM message p2 - WHERE 1=1 - AND p2.m_creationdate < DATE('2011-07-22') -- FIXME:param - ) pc -- post_count - GROUP BY messageYear, message_order, lengthCategory, pc.cnt - --ORDER BY messageYear DESC, isComment ASC, lengthCategory ASC -; diff --git a/dbtoaster/queries/bi-12.sql b/dbtoaster/queries/bi-12.sql deleted file mode 100644 index cfd6ae138..000000000 --- a/dbtoaster/queries/bi-12.sql +++ /dev/null @@ -1,33 +0,0 @@ -/* Q12. Trending Posts -\set date '\'2011-07-22T00:00:00.000+00:00\''::timestamp -\set likeThreshold 400 - */ -SELECT result.messageId - , result.messageCreationDate - , result.creatorFirstName - , result.creatorLastName - , result.likeCount - FROM ( - SELECT m.m_messageid AS messageId - , m.m_creationdate AS messageCreationDate - , c.p_firstname AS creatorFirstName - , c.p_lastname AS creatorLastName - , count(*) as likeCount - FROM message m - , person c -- creator - , likes l - WHERE 1=1 - -- join - AND m.m_creatorid = c.p_personid - AND m.m_messageid = l.l_messageid - -- filter - AND m.m_creationdate > DATE('2011-07-22') -- FIXME:param :date - GROUP BY m.m_messageid - , m.m_creationdate - , c.p_firstname - , c.p_lastname - ) result - WHERE result.likeCount > 400 -- FIXME:param :likeThreshold - --ORDER BY likeCount DESC, messageId - --LIMIT 100 -; diff --git a/dbtoaster/queries/bi-17.sql b/dbtoaster/queries/bi-17.sql deleted file mode 100644 index b31406812..000000000 --- a/dbtoaster/queries/bi-17.sql +++ /dev/null @@ -1,58 +0,0 @@ -/* Q17. Friend triangles -\set country '\'Belarus\'' - */ -SELECT count(*) - FROM ( - SELECT p.p_personid AS personid - , k.k_person2id as friendid - FROM person p - , place ci -- city - , place co -- country - , knows k - WHERE 1=1 - -- join - AND p.p_placeid = ci.pl_placeid - AND ci.pl_containerplaceid = co.pl_placeid - AND p.p_personid = k.k_person1id - -- filter - AND co.pl_name = 'Belarus' -- FIXME:param :country - ) p1 - , ( - SELECT p.p_personid AS personid - , k.k_person2id as friendid - FROM person p - , place ci -- city - , place co -- country - , knows k - WHERE 1=1 - -- join - AND p.p_placeid = ci.pl_placeid - AND ci.pl_containerplaceid = co.pl_placeid - AND p.p_personid = k.k_person1id - -- filter - AND co.pl_name = 'Belarus' -- FIXME:param :country - ) p2 - , ( - SELECT p.p_personid AS personid - , k.k_person2id as friendid - FROM person p - , place ci -- city - , place co -- country - , knows k - WHERE 1=1 - -- join - AND p.p_placeid = ci.pl_placeid - AND ci.pl_containerplaceid = co.pl_placeid - AND p.p_personid = k.k_person1id - -- filter - AND co.pl_name = 'Belarus' -- FIXME:param :country - ) p3 - WHERE 1=1 - -- join - AND p1.friendid = p2.personid - AND p2.friendid = p3.personid - AND p3.friendid = p1.personid - -- filter: unique trinagles only - AND p1.personid < p2.personid - AND p2.personid < p3.personid -; diff --git a/dbtoaster/queries/bi-2.sql b/dbtoaster/queries/bi-2.sql deleted file mode 100644 index 8abfccc69..000000000 --- a/dbtoaster/queries/bi-2.sql +++ /dev/null @@ -1,60 +0,0 @@ -/* Q2. Top tags for country, age, gender, time -\set startDate '\'2010-01-01T00:00:00.000+00:00\''::timestamp -\set endDate '\'2010-11-08T00:00:00.000+00:00\''::timestamp -\set country1 '\'Ethiopia\'' -\set country2 '\'Belarus\'' - */ -SELECT countryName - , messageMonth - , personGender - , ageGroup - , tagName - , messageCount - FROM ( - SELECT co.pl_name AS countryName - , extract(MONTH FROM p.m_creationdate) as messageMonth - , cr.p_gender AS personGender - , CASE -- ugly hack because I was unable to figure out how to cast_int/floor an expression - /* - -- the youngest person was born in 1980, so should be of age 33 in 2013. - -- Using 30 levels os CASE-WHEN lead to the exception during query compilation -java.lang.StackOverflowError - at ddbt.codegen.LMSGen.expr(LMSGen.scala:109) - at ddbt.codegen.LMSGen$$anonfun$expr$11.apply(LMSGen.scala:106) - at ddbt.codegen.LMSGen$$anonfun$expr$11.apply(LMSGen.scala:106) - at ddbt.codegen.LMSGen.expr(LMSGen.scala:41) - */ - WHEN (2013 - extract(YEAR FROM cr.p_birthday))/5 >= 10 THEN 10 - WHEN (2013 - extract(YEAR FROM cr.p_birthday))/5 >= 9 THEN 9 - WHEN (2013 - extract(YEAR FROM cr.p_birthday))/5 >= 8 THEN 8 - WHEN (2013 - extract(YEAR FROM cr.p_birthday))/5 >= 7 THEN 7 - WHEN (2013 - extract(YEAR FROM cr.p_birthday))/5 >= 6 THEN 6 - WHEN (2013 - extract(YEAR FROM cr.p_birthday))/5 >= 5 THEN 5 - WHEN (2013 - extract(YEAR FROM cr.p_birthday))/5 >= 4 THEN 4 - WHEN (2013 - extract(YEAR FROM cr.p_birthday))/5 >= 3 THEN 3 - WHEN (2013 - extract(YEAR FROM cr.p_birthday))/5 >= 2 THEN 2 - WHEN (2013 - extract(YEAR FROM cr.p_birthday))/5 >= 1 THEN 1 - WHEN (2013 - extract(YEAR FROM cr.p_birthday))/5 >= 0 THEN 0 - END AS ageGroup - , t.t_name AS tagName - , count(*) AS messageCount - FROM message p - , message_tag pt - , tag t - , person cr -- creator - , place ci -- city - , place co -- country - WHERE 1=1 - -- join - AND p.m_messageid = pt.mt_messageid - AND pt.mt_tagid = t.t_tagid - AND p.m_creatorid = cr.p_personid - AND cr.p_placeid = ci.pl_placeid - AND ci.pl_containerplaceid = co.pl_placeid - -- filter - AND (co.pl_name = 'Ethiopia' OR co.pl_name = 'Belarus') -- FIXME:param country1, country2 - AND p.m_creationdate BETWEEN DATE('2010-01-01') AND DATE('2010-11-08') -- FIXME:param startDate, endDate - GROUP BY co.pl_name, messageMonth, cr.p_gender, t.t_name, ageGroup - ) res - WHERE res.messageCount > 100 -; diff --git a/dbtoaster/queries/bi-23.sql b/dbtoaster/queries/bi-23.sql deleted file mode 100644 index 3b0997763..000000000 --- a/dbtoaster/queries/bi-23.sql +++ /dev/null @@ -1,24 +0,0 @@ -/* Q23. Holiday destinations -\set country '\'Belarus\'' - */ -SELECT count(*) AS messageCount - , dest.pl_name AS destinationName - , extract(MONTH FROM m.m_creationdate) AS month - FROM place pco -- person country - , place pci -- person city - , person p - , message m - , place dest - WHERE 1=1 - -- join - AND pco.pl_placeid = pci.pl_containerplaceid - AND pci.pl_placeid = p.p_placeid - AND p.p_personid = m.m_creatorid - AND m.m_locationid = dest.pl_placeid - -- filter - AND pco.pl_name = 'Belarus' -- FIXME:param :country - AND m.m_locationid != pco.pl_placeid - GROUP BY dest.pl_name, month - --ORDER BY messageCount DESC, dest.pl_name, month - --LIMIT 100 -; diff --git a/dbtoaster/queries/bi-3.sql b/dbtoaster/queries/bi-3.sql deleted file mode 100644 index d518a3d02..000000000 --- a/dbtoaster/queries/bi-3.sql +++ /dev/null @@ -1,46 +0,0 @@ -/* Q3. Tag evolution -\set year 2010 -\set month 11 - */ -SELECT d.t_name as tagName - , d.countMonth1 - , d.countMonth2 - , CASE - WHEN d.countMonth1-d.countMonth2 > 0 THEN d.countMonth1-d.countMonth2 - ELSE d.countMonth2-d.countMonth1 - END AS diff - FROM ( - SELECT tab1.t_name - , sum(tab1.belongsToMonth1) AS countMonth1 - , sum(tab1.belongsToMonth2) AS countMonth2 - FROM ( - SELECT t.t_name - , CASE WHEN extract(MONTH FROM m.m_creationdate) = param.month1 THEN 1 ELSE 0 END AS belongsToMonth1 - , CASE WHEN extract(MONTH FROM m.m_creationdate) = param.month2 THEN 1 ELSE 0 END AS belongsToMonth2 - FROM message m - , message_tag mt - , tag t - , ( - SELECT param_inner.year AS year1 - , param_inner.month AS month1 - , CASE WHEN param_inner.month = 12 THEN param_inner.year + 1 ELSE param_inner.year END AS year2 - , CASE WHEN param_inner.month = 12 THEN 1 ELSE param_inner.month+1 END AS month2 - FROM ( - SELECT 2010 AS year, 11 AS month -- FIXME:param :year, :month - ) param_inner - ) param - WHERE 1=1 - -- join - AND m.m_messageid = mt.mt_messageid - AND mt.mt_tagid = t.t_tagid - -- filter - AND ( 0=1 - OR extract(YEAR FROM m.m_creationdate) = param.year1 AND extract(MONTH FROM m.m_creationdate) = param.month1 - OR extract(YEAR FROM m.m_creationdate) = param.year2 AND extract(MONTH FROM m.m_creationdate) = param.month2 - ) - ) tab1 - GROUP BY tab1.t_name - ) d -- detail - --ORDER BY diff desc, t_name - --LIMIT 100 -; diff --git a/dbtoaster/queries/bi-4.sql b/dbtoaster/queries/bi-4.sql deleted file mode 100644 index 62ff482f5..000000000 --- a/dbtoaster/queries/bi-4.sql +++ /dev/null @@ -1,33 +0,0 @@ -/* Q4. Popular topics in a country -\set tagClass '\'MusicalArtist\'' -\set country '\'Burma\'' - */ -SELECT f.f_forumid AS forumId - , f.f_title AS forumTitle - , f.f_creationdate AS forumCreationDate - , f.f_moderatorid AS personId - , count(DISTINCT p.m_messageid) AS postCount - FROM tagClass tc - , tag t - , message_tag pt - , message p - , forum f - , person m -- moderator - , place ci -- city - , place co -- country - WHERE 1=1 - -- join - AND tc.tc_tagclassid = t.t_tagclassid - AND t.t_tagid = pt.mt_tagid - AND pt.mt_messageid = p.m_messageid - AND p.m_ps_forumid = f.f_forumid - AND f.f_moderatorid = m.p_personid - AND m.p_placeid = ci.pl_placeid - AND ci.pl_containerplaceid = co.pl_placeid - -- filter - AND tc.tc_name = 'MusicalArtist' -- FIXME:param :tagClass - AND co.pl_name = 'Burma' -- FIXME:param :country - GROUP BY f.f_forumid, f.f_title, f.f_creationdate, f.f_moderatorid - --ORDER BY postCount DESC, f.f_forumid - --LIMIT 20 -; diff --git a/dbtoaster/queries/bi-9.sql b/dbtoaster/queries/bi-9.sql deleted file mode 100644 index 62431df70..000000000 --- a/dbtoaster/queries/bi-9.sql +++ /dev/null @@ -1,49 +0,0 @@ -/* Q9. Forum with related Tags -\set tagClass1 '\'BaseballPlayer\'' -\set tagClass2 '\'ChristianBishop\'' -\set threshold 200 - */ -SELECT f.f_forumid AS forumId - , count(DISTINCT p1.m_messageid) AS count1 - , count(DISTINCT p2.m_messageid) AS count2 - FROM tagclass tc1 - , tag t1 - , message_tag pt1 - , message p1 - , tagclass tc2 - , tag t2 - , message_tag pt2 - , message p2 - , forum f - , ( - SELECT fp1.forumid - FROM ( - SELECT fp0.fp_forumid AS forumid - , count(*) AS forumPersonCnt - FROM forum_person fp0 - GROUP BY fp0.fp_forumid - ) fp1 - WHERE fp1.forumPersonCnt > 200 -- FIXME:param :threshold - ) pf -- popular_forums - WHERE 1=1 - -- join - -- tagClass1 to forum - AND tc1.tc_tagclassid = t1.t_tagclassid - AND t1.t_tagid = pt1.mt_tagid - AND pt1.mt_messageid = p1.m_messageid - AND p1.m_ps_forumid = f.f_forumid - AND f.f_forumid = pf.forumid - -- tagClass2 to forum - AND tc2.tc_tagclassid = t2.t_tagclassid - AND t2.t_tagid = pt2.mt_tagid - AND pt2.mt_messageid = p2.m_messageid - AND p2.m_ps_forumid = f.f_forumid - -- filter - AND tc1.tc_name = 'BaseballPlayer' -- FIXME:param :tagClass1 - AND tc2.tc_name = 'ChristianBishop' -- FIXME:param :tagClass2 - AND p1.m_c_replyof = -1 - AND p2.m_c_replyof = -1 - GROUP BY f.f_forumid - --ORDER BY abs(count(DISTINCT p2.m_messageid) - count(DISTINCT p1.m_messageid) ) DESC, f.f_forumid - --LIMIT 100 -; diff --git a/dbtoaster/sandbox.sql b/dbtoaster/sandbox.sql deleted file mode 100644 index ab79226c3..000000000 --- a/dbtoaster/sandbox.sql +++ /dev/null @@ -1,74 +0,0 @@ -INCLUDE 'schema.sql'; - - -/* -*/ - -SELECT count(*) AS message_cnt - FROM message -; - -SELECT COUNT(*) AS forum_cnt - FROM forum -; - -SELECT COUNT(*) AS forum_person_cnt - FROM forum_person -; - -SELECT COUNT(*) AS forum_tag_cnt - FROM forum_tag -; - -SELECT COUNT(*) AS org_cnt - FROM organisation -; - -SELECT COUNT(*) AS person_cnt - FROM person -; - -SELECT COUNT(*) AS person_email_cnt - FROM person_email -; - -SELECT COUNT(*) AS person_tag_cnt - FROM person_tag -; - -SELECT COUNT(*) AS knows_cnt - FROM knows -; - -SELECT COUNT(*) AS likes_cnt - FROM likes -; - -SELECT COUNT(*) AS person_language_cnt - FROM person_language -; - -SELECT COUNT(*) AS person_university_cnt - FROM person_university -; - -SELECT COUNT(*) AS person_company_cnt - FROM person_company -; - -SELECT COUNT(*) AS place_cnt - FROM place -; - -SELECT COUNT(*) AS message_tag_cnt - FROM message_tag -; - -SELECT COUNT(*) AS tag_cnt - FROM tag -; - -SELECT COUNT(*) AS tagclass_cnt - FROM tagclass -; - diff --git a/dbtoaster/schema.sql b/dbtoaster/schema.sql deleted file mode 100644 index 9a70eef5b..000000000 --- a/dbtoaster/schema.sql +++ /dev/null @@ -1,180 +0,0 @@ -CREATE STREAM message ( - /* - * m_ps_ denotes field specific to posts - * m_c_ denotes field specific to comments - * other m_ fields are common to posts and messages - * - * Note: to distinguish between "post" and "comment" records: - * - m_c_replyof IS NULL for all "post" records - * - m_c_replyof IS NOT NULL for all "comment" records - */ - m_messageid int, - m_ps_imagefile varchar, - m_creationdate date, - m_locationip varchar, - m_browserused varchar, - m_ps_language varchar, - m_content varchar, - m_length int, - m_creatorid int, - m_locationid int, - m_ps_forumid int, - m_c_replyof int -) -FROM FILE 'data/message_0_0.csv' -LINE DELIMITED CSV (delimiter := '|') -; - -CREATE STREAM forum ( - f_forumid int, - f_title varchar, - f_creationdate date, - f_moderatorid int -) -FROM FILE 'data/forum_0_0.csv' -LINE DELIMITED CSV (delimiter := '|') -; - -CREATE STREAM forum_person ( - fp_forumid int, - fp_personid int, - fp_joindate date -) -FROM FILE 'data/forum_hasMember_person_0_0.csv' -LINE DELIMITED CSV (delimiter := '|') -; - -CREATE STREAM forum_tag ( - ft_forumid int, - ft_tagid int -) -FROM FILE 'data/forum_hasTag_tag_0_0.csv' -LINE DELIMITED CSV (delimiter := '|') -; - -CREATE STREAM organisation ( - o_organisationid int, - o_type varchar, - o_name varchar, - o_url varchar, - o_placeid int -) -FROM FILE 'data/organisation_0_0.csv' -LINE DELIMITED CSV (delimiter := '|') -; - -CREATE STREAM person ( - p_personid int, - p_firstname varchar, - p_lastname varchar, - p_gender varchar, - p_birthday date, - p_creationdate date, - p_locationip varchar, - p_browserused varchar, - p_placeid int -) -FROM FILE 'data/person_0_0.csv' -LINE DELIMITED CSV (delimiter := '|') -; ---TODO: Add p_country int - -CREATE STREAM person_email ( - pe_personid int, - pe_email varchar -) -FROM FILE 'data/person_email_emailaddress_0_0.csv' -LINE DELIMITED CSV (delimiter := '|') -; - - -CREATE STREAM person_tag ( - pt_personid int, - pt_tagid int -) -FROM FILE 'data/person_hasInterest_tag_0_0.csv' -LINE DELIMITED CSV (delimiter := '|') -; - -CREATE STREAM knows ( - k_person1id int, - k_person2id int, - k_creationdate date -) -FROM FILE 'data/person_knows_person_0_0.csv' -LINE DELIMITED CSV (delimiter := '|') -; - -CREATE STREAM likes ( - l_personid int, - l_messageid int, - l_creationdate date -) -FROM FILE 'data/person_likes_post_0_0.csv' -LINE DELIMITED CSV (delimiter := '|') -; - -CREATE STREAM person_language ( - plang_personid int, - plang_language varchar -) -FROM FILE 'data/person_speaks_language_0_0.csv' -LINE DELIMITED CSV (delimiter := '|') -; - -CREATE STREAM person_university ( - pu_personid int, - pu_organisationid int, - pu_classyear int -) -FROM FILE 'data/person_studyAt_organisation_0_0.csv' -LINE DELIMITED CSV (delimiter := '|') -; - -CREATE STREAM person_company ( - pc_personid int, - pc_organisationid int, - pc_workfrom int -) -FROM FILE 'data/person_workAt_organisation_0_0.csv' -LINE DELIMITED CSV (delimiter := '|') -; - -CREATE STREAM place ( - pl_placeid int, - pl_name varchar, - pl_url varchar, - pl_type varchar, - pl_containerplaceid int -) -FROM FILE 'data/place_0_0.csv' -LINE DELIMITED CSV (delimiter := '|') -; - -CREATE STREAM message_tag ( - mt_messageid int, - mt_tagid int -) -FROM FILE 'data/post_hasTag_tag_0_0.csv' -LINE DELIMITED CSV (delimiter := '|') -; - -CREATE STREAM tagclass ( - tc_tagclassid int, - tc_name varchar, - tc_url varchar, - tc_subclassoftagclassid int -) -FROM FILE 'data/tagclass_0_0.csv' -LINE DELIMITED CSV (delimiter := '|') -; - -CREATE STREAM tag ( - t_tagid int, - t_name varchar, - t_url varchar, - t_tagclassid int -) -FROM FILE 'data/tag_0_0.csv' -LINE DELIMITED CSV (delimiter := '|') -;