From dd04b1b8686c52d2b2402daffc7a431b7cbb56dc Mon Sep 17 00:00:00 2001 From: Sofia Yfantidou Date: Thu, 4 May 2017 17:20:58 +0300 Subject: [PATCH] Fixes YouTube class bug with deprecated method Fixes Twitter null username result Adds independent word frequency counter to AnalyticsExtractor --- resources/gay.txt | 3 ++ resources/jones.txt | 4 +++ resources/sharapova.txt | 11 ++++++++ src/analytics/AnalyticsExtractor.java | 34 ++++++++++++++++++----- src/combiner/Combiner.java | 16 +++++------ src/twitter/manager/TweetManager.java | 4 +-- src/youtube/YTCommentsCollector.java | 40 +++++++++++++++++++-------- src/youtube/YoutubeExporter.java | 2 +- 8 files changed, 85 insertions(+), 29 deletions(-) create mode 100644 resources/gay.txt create mode 100644 resources/jones.txt create mode 100644 resources/sharapova.txt diff --git a/resources/gay.txt b/resources/gay.txt new file mode 100644 index 0000000..49c736e --- /dev/null +++ b/resources/gay.txt @@ -0,0 +1,3 @@ +6lYivLjHjKs +Z8y0Sb3EUag +tjfl4IGR7VA diff --git a/resources/jones.txt b/resources/jones.txt new file mode 100644 index 0000000..061b86d --- /dev/null +++ b/resources/jones.txt @@ -0,0 +1,4 @@ +DkQpTdVK1cc +H3JmeHKFEZU +y1USv6QL-Dc +rWyGWu186zY diff --git a/resources/sharapova.txt b/resources/sharapova.txt new file mode 100644 index 0000000..65f8180 --- /dev/null +++ b/resources/sharapova.txt @@ -0,0 +1,11 @@ +pyeAbGKx8JA +R3iaTN3Q3Zw +80lfHwmkGNA +1s4_mppy6p8 +_u5Jkaho3Qw +sn6SdpXtoSg +6YSVjWDTuPY +u1fxX-WT5FQ +PTgpfwDRGvQ +jDtvAFrzxhM +6yj53Q53cqY diff --git a/src/analytics/AnalyticsExtractor.java b/src/analytics/AnalyticsExtractor.java index 90c2ca3..ee6093e 100644 --- a/src/analytics/AnalyticsExtractor.java +++ b/src/analytics/AnalyticsExtractor.java @@ -38,14 +38,23 @@ private void getTwitterMentionFrequencies() { writeToTagcloudFile(calculateFrequencies("mentions","twitter"),"mentions_frequencies_twitter.txt"); } + /** + * Gets the word frequencies in collected tweets and comments + */ + private void getWordFrequencies() { + writeToTagcloudFile(calculateFrequencies("parsedString","twitter"),"word_frequencies_twitter.txt"); + writeToTagcloudFile(calculateFrequencies("parsedString","youtube"),"word_frequencies_youtube.txt"); + } + /** * Gets the location frequencies in collected tweets and Youtube comments */ private void getLocationFrequencies() { writeToTagcloudFile(getCountriesFrequencies(calculateFrequenciesSimple("geo","twitter")),"location_frequencies_twitter.txt"); - writeToTagcloudFile(getCountriesFrequencies(calculateFrequenciesSimple("location","youtube")),"location_frequencies_youtube.txt"); + //writeToTagcloudFile(getCountriesFrequencies(calculateFrequenciesSimple("location","youtube")),"location_frequencies_youtube.txt"); } + private HashMap getCountriesFrequencies(HashMap locations) { HashMap countriesFrequencies = new HashMap<>(); for(String location : locations.keySet()) { @@ -81,10 +90,11 @@ private void getTwitterUsersFrequencies() { * Gets the youtube users frequencies in collected Youtube comments */ private void getYoutubeUsersFrequencies() { - writeToTagcloudFile(calculateFrequenciesSimple("authorID","youtube"),"user_frequencies_youtube.txt"); + writeToTagcloudFile(calculateFrequenciesSimple("authorName","youtube"),"user_frequencies_youtube.txt"); } + /** * Calculates frequencies of field by hashing, without preprocessing * @param field The field, which frequency is counted @@ -124,10 +134,18 @@ private HashMap calculateFrequenciesSimple(String field, String private HashMap calculateFrequencies(String field, String medium) { HashMap frequencies = new HashMap<>(); HashMap tweets_comments; - if(medium.equals("twitter")) { - tweets_comments = mc.getTweets(); + if(field.compareTo("parsedString") != 0) { + if (medium.equals("twitter")) { + tweets_comments = mc.getTweets(); + } else { + tweets_comments = mc.getComments(); + } } else { - tweets_comments = mc.getComments(); + if (medium.equals("twitter")) { + tweets_comments = mc.getFullTweets(); + } else { + tweets_comments = mc.getFullComments(); + } } for(JSONObject tweet_comment : tweets_comments.values()) { @@ -137,8 +155,9 @@ private HashMap calculateFrequencies(String field, String medium } catch (JSONException e) { e.printStackTrace(); } - if(!fieldValues[0].isEmpty()) { - for (String fieldValue : fieldValues) { + + for (String fieldValue : fieldValues) { + if(!fieldValue.isEmpty()) { frequencies.putIfAbsent(fieldValue, 0); frequencies.computeIfPresent(fieldValue, (k, v) -> v + 1); } @@ -176,5 +195,6 @@ public void analyze() { getDateFrequencies(); getTwitterUsersFrequencies(); getYoutubeUsersFrequencies(); + getWordFrequencies(); } } diff --git a/src/combiner/Combiner.java b/src/combiner/Combiner.java index 1cee4c6..b7a560f 100644 --- a/src/combiner/Combiner.java +++ b/src/combiner/Combiner.java @@ -29,22 +29,22 @@ public static void main(String[] args) throws JSONException, IOException { if(args != null) { - /*// Collect Youtube comments and insert them to DB - YoutubeExporter.main(args); - Preprocessor.preprocessComments(args[4].split("=")[1], fc); + // Collect Youtube comments and insert them to DB + /*YoutubeExporter.main(args); + Preprocessor.preprocessComments(args[4].split("=")[1], fc);*/ // Collect tweets and insert them to DB - TwitterExporter.main(args); - Preprocessor.preprocessTweets(args[4].split("=")[1], fc); + /*TwitterExporter.main(args); + Preprocessor.preprocessTweets(args[4].split("=")[1], fc);*/ - fc.exportFrequencies(); //creates frequencies.txt - sorted alphabetically - fc.exportFrequenciesByValue(); //creates frequenciesByValue.txt - sorted by frequencies (descending order) + /*fc.exportFrequencies(); //creates frequencies.txt - sorted alphabetically + fc.exportFrequenciesByValue(); //creates frequenciesByValue.txt - sorted by frequencies (descending order)*/ if(args.length >= 5) { analyticsExtractor = new AnalyticsExtractor(args[4].split("=")[1]); analyticsExtractor.analyze(); - }*/ + } Analysis analysis = new Analysis(args[4].split("=")[1]); analysis.SentimentAnalysis(); diff --git a/src/twitter/manager/TweetManager.java b/src/twitter/manager/TweetManager.java index 57d5b49..c216374 100644 --- a/src/twitter/manager/TweetManager.java +++ b/src/twitter/manager/TweetManager.java @@ -74,7 +74,7 @@ private static String getURLResponse(String username, String since, String until * @return A list of all tweets found */ public static List getTweets(TwitterCriteria criteria) { - List results = new ArrayList(); + List results = new ArrayList<>(); try { String refreshCursor = null; @@ -89,7 +89,7 @@ public static List getTweets(TwitterCriteria criteria) { } for (Element tweet : tweets) { - String usernameTweet = tweet.select("span.username.js-action-profile-name b").text(); + String usernameTweet = tweet.select("span.username").text(); String txt = tweet.select("p.js-tweet-text").text().replaceAll("[^\\u0000-\\uFFFF]", ""); int retweets = Integer.valueOf(tweet.select("span.ProfileTweet-action--retweet span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replaceAll(",", "")); int favorites = Integer.valueOf(tweet.select("span.ProfileTweet-action--favorite span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replaceAll(",", "")); diff --git a/src/youtube/YTCommentsCollector.java b/src/youtube/YTCommentsCollector.java index fc94a09..2d0580e 100644 --- a/src/youtube/YTCommentsCollector.java +++ b/src/youtube/YTCommentsCollector.java @@ -66,8 +66,15 @@ public ArrayList collectComments(String filenameArgument) { CommentSnippet snippet = videoComment.getSnippet().getTopLevelComment().getSnippet(); String comment = snippet.getTextDisplay(); String authorID = snippet.getAuthorChannelId().toString(); + String authorName = snippet.getAuthorDisplayName(); + + JsonObject jsonObject = createJSONfor(comment,authorID,authorName); + jsons.add(jsonObject); + + + // DEPRECATED + /*String userURL = snippet.getAuthorGoogleplusProfileUrl(); - String userURL = snippet.getAuthorGoogleplusProfileUrl(); if(userURL!=null){ //if the user still exists String[] splitted = userURL.split("/"); @@ -82,8 +89,7 @@ public ArrayList collectComments(String filenameArgument) { JsonObject jsonObject = createJSONfor(comment,authorID,userDetails[0],userDetails[1],userDetails[2]); jsons.add(jsonObject); - - } + }*/ @@ -132,8 +138,6 @@ private String readAPIKey(){ String key = bufferedReader.readLine(); //key is in the string in the first line of the API_KEY.txt file bufferedReader.close(); return key; - } catch (FileNotFoundException e) { - e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } @@ -148,7 +152,7 @@ private String readAPIKey(){ */ private String[] getDetailsFromJSON(HttpEntity entity){ String[] details = new String[3]; - String retSrc = null; + String retSrc; if(entity!=null) { //null => user does not exist @@ -183,9 +187,7 @@ private String[] getDetailsFromJSON(HttpEntity entity){ details[2] = "-"; } - } catch (IOException e) { - e.printStackTrace(); - } catch (JSONException e) { + } catch (IOException | JSONException e) { e.printStackTrace(); } @@ -214,8 +216,6 @@ private ArrayList fetchVideoIds(String filenameArgument){ while ((line = br.readLine()) != null) { videoIDs.add(line); //adds the videoID in the list } - } catch (FileNotFoundException e) { - e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } @@ -237,4 +237,22 @@ private JsonObject createJSONfor(String comment,String authorID,String gender,St return jsonObject; } + /** + * Builds JSON object for non deprecated attributes + * @param comment The comment's text + * @param authorID The author's ID + * @param name The author's YouTube username + * @return JSON object containing the above information + */ + private JsonObject createJSONfor(String comment,String authorID,String name){ + + JsonObject jsonObject = factory.createObjectBuilder() + .add("comment", comment) + .add("authorID",authorID) + .add("authorName",name) + .build(); + + return jsonObject; + } + } diff --git a/src/youtube/YoutubeExporter.java b/src/youtube/YoutubeExporter.java index eddeb8a..b2cfe44 100644 --- a/src/youtube/YoutubeExporter.java +++ b/src/youtube/YoutubeExporter.java @@ -16,7 +16,7 @@ public static void main(String args[]){ for(JsonObject json : jsons){ String comment = json.getString("comment"); //gets the comment field //String preprocessed = Preprocessor.preprocessComment(comment); - //PREPROCESSED TEXT SHOULD BE SAVEN INSIDE MONGOCONNECTOR (SOFIA) + //PREPROCESSED TEXT SHOULD BE SAVED INSIDE MONGOCONNECTOR (SOFIA) } for (String parameter : args) {