Skip to content

Commit

Permalink
Fixes YouTube class bug with deprecated method
Browse files Browse the repository at this point in the history
Fixes Twitter null username result
Adds independent word frequency counter to AnalyticsExtractor
  • Loading branch information
syfantid committed May 4, 2017
1 parent 2c1823e commit dd04b1b
Show file tree
Hide file tree
Showing 8 changed files with 85 additions and 29 deletions.
3 changes: 3 additions & 0 deletions resources/gay.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
6lYivLjHjKs
Z8y0Sb3EUag
tjfl4IGR7VA
4 changes: 4 additions & 0 deletions resources/jones.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
DkQpTdVK1cc
H3JmeHKFEZU
y1USv6QL-Dc
rWyGWu186zY
11 changes: 11 additions & 0 deletions resources/sharapova.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
pyeAbGKx8JA
R3iaTN3Q3Zw
80lfHwmkGNA
1s4_mppy6p8
_u5Jkaho3Qw
sn6SdpXtoSg
6YSVjWDTuPY
u1fxX-WT5FQ
PTgpfwDRGvQ
jDtvAFrzxhM
6yj53Q53cqY
34 changes: 27 additions & 7 deletions src/analytics/AnalyticsExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,23 @@ private void getTwitterMentionFrequencies() {
writeToTagcloudFile(calculateFrequencies("mentions","twitter"),"mentions_frequencies_twitter.txt");
}

/**
* Gets the word frequencies in collected tweets and comments
*/
private void getWordFrequencies() {
writeToTagcloudFile(calculateFrequencies("parsedString","twitter"),"word_frequencies_twitter.txt");
writeToTagcloudFile(calculateFrequencies("parsedString","youtube"),"word_frequencies_youtube.txt");
}

/**
* Gets the location frequencies in collected tweets and Youtube comments
*/
private void getLocationFrequencies() {
writeToTagcloudFile(getCountriesFrequencies(calculateFrequenciesSimple("geo","twitter")),"location_frequencies_twitter.txt");
writeToTagcloudFile(getCountriesFrequencies(calculateFrequenciesSimple("location","youtube")),"location_frequencies_youtube.txt");
//writeToTagcloudFile(getCountriesFrequencies(calculateFrequenciesSimple("location","youtube")),"location_frequencies_youtube.txt");
}


private HashMap<String,Integer> getCountriesFrequencies(HashMap<String,Integer> locations) {
HashMap<String, Integer> countriesFrequencies = new HashMap<>();
for(String location : locations.keySet()) {
Expand Down Expand Up @@ -81,10 +90,11 @@ private void getTwitterUsersFrequencies() {
* Gets the youtube users frequencies in collected Youtube comments
*/
private void getYoutubeUsersFrequencies() {
writeToTagcloudFile(calculateFrequenciesSimple("authorID","youtube"),"user_frequencies_youtube.txt");
writeToTagcloudFile(calculateFrequenciesSimple("authorName","youtube"),"user_frequencies_youtube.txt");
}



/**
* Calculates frequencies of field by hashing, without preprocessing
* @param field The field, which frequency is counted
Expand Down Expand Up @@ -124,10 +134,18 @@ private HashMap<String,Integer> calculateFrequenciesSimple(String field, String
private HashMap<String,Integer> calculateFrequencies(String field, String medium) {
HashMap<String,Integer> frequencies = new HashMap<>();
HashMap<ObjectId,JSONObject> tweets_comments;
if(medium.equals("twitter")) {
tweets_comments = mc.getTweets();
if(field.compareTo("parsedString") != 0) {
if (medium.equals("twitter")) {
tweets_comments = mc.getTweets();
} else {
tweets_comments = mc.getComments();
}
} else {
tweets_comments = mc.getComments();
if (medium.equals("twitter")) {
tweets_comments = mc.getFullTweets();
} else {
tweets_comments = mc.getFullComments();
}
}

for(JSONObject tweet_comment : tweets_comments.values()) {
Expand All @@ -137,8 +155,9 @@ private HashMap<String,Integer> calculateFrequencies(String field, String medium
} catch (JSONException e) {
e.printStackTrace();
}
if(!fieldValues[0].isEmpty()) {
for (String fieldValue : fieldValues) {

for (String fieldValue : fieldValues) {
if(!fieldValue.isEmpty()) {
frequencies.putIfAbsent(fieldValue, 0);
frequencies.computeIfPresent(fieldValue, (k, v) -> v + 1);
}
Expand Down Expand Up @@ -176,5 +195,6 @@ public void analyze() {
getDateFrequencies();
getTwitterUsersFrequencies();
getYoutubeUsersFrequencies();
getWordFrequencies();
}
}
16 changes: 8 additions & 8 deletions src/combiner/Combiner.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,22 +29,22 @@ public static void main(String[] args) throws JSONException, IOException {


if(args != null) {
/*// Collect Youtube comments and insert them to DB
YoutubeExporter.main(args);
Preprocessor.preprocessComments(args[4].split("=")[1], fc);
// Collect Youtube comments and insert them to DB
/*YoutubeExporter.main(args);
Preprocessor.preprocessComments(args[4].split("=")[1], fc);*/


// Collect tweets and insert them to DB
TwitterExporter.main(args);
Preprocessor.preprocessTweets(args[4].split("=")[1], fc);
/*TwitterExporter.main(args);
Preprocessor.preprocessTweets(args[4].split("=")[1], fc);*/

fc.exportFrequencies(); //creates frequencies.txt - sorted alphabetically
fc.exportFrequenciesByValue(); //creates frequenciesByValue.txt - sorted by frequencies (descending order)
/*fc.exportFrequencies(); //creates frequencies.txt - sorted alphabetically
fc.exportFrequenciesByValue(); //creates frequenciesByValue.txt - sorted by frequencies (descending order)*/

if(args.length >= 5) {
analyticsExtractor = new AnalyticsExtractor(args[4].split("=")[1]);
analyticsExtractor.analyze();
}*/
}

Analysis analysis = new Analysis(args[4].split("=")[1]);
analysis.SentimentAnalysis();
Expand Down
4 changes: 2 additions & 2 deletions src/twitter/manager/TweetManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ private static String getURLResponse(String username, String since, String until
* @return A list of all tweets found
*/
public static List<Tweet> getTweets(TwitterCriteria criteria) {
List<Tweet> results = new ArrayList<Tweet>();
List<Tweet> results = new ArrayList<>();

try {
String refreshCursor = null;
Expand All @@ -89,7 +89,7 @@ public static List<Tweet> getTweets(TwitterCriteria criteria) {
}

for (Element tweet : tweets) {
String usernameTweet = tweet.select("span.username.js-action-profile-name b").text();
String usernameTweet = tweet.select("span.username").text();
String txt = tweet.select("p.js-tweet-text").text().replaceAll("[^\\u0000-\\uFFFF]", "");
int retweets = Integer.valueOf(tweet.select("span.ProfileTweet-action--retweet span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replaceAll(",", ""));
int favorites = Integer.valueOf(tweet.select("span.ProfileTweet-action--favorite span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replaceAll(",", ""));
Expand Down
40 changes: 29 additions & 11 deletions src/youtube/YTCommentsCollector.java
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,15 @@ public ArrayList<JsonObject> collectComments(String filenameArgument) {
CommentSnippet snippet = videoComment.getSnippet().getTopLevelComment().getSnippet();
String comment = snippet.getTextDisplay();
String authorID = snippet.getAuthorChannelId().toString();
String authorName = snippet.getAuthorDisplayName();

JsonObject jsonObject = createJSONfor(comment,authorID,authorName);
jsons.add(jsonObject);


// DEPRECATED
/*String userURL = snippet.getAuthorGoogleplusProfileUrl();
String userURL = snippet.getAuthorGoogleplusProfileUrl();
if(userURL!=null){ //if the user still exists
String[] splitted = userURL.split("/");
Expand All @@ -82,8 +89,7 @@ public ArrayList<JsonObject> collectComments(String filenameArgument) {
JsonObject jsonObject = createJSONfor(comment,authorID,userDetails[0],userDetails[1],userDetails[2]);
jsons.add(jsonObject);

}
}*/



Expand Down Expand Up @@ -132,8 +138,6 @@ private String readAPIKey(){
String key = bufferedReader.readLine(); //key is in the string in the first line of the API_KEY.txt file
bufferedReader.close();
return key;
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
Expand All @@ -148,7 +152,7 @@ private String readAPIKey(){
*/
private String[] getDetailsFromJSON(HttpEntity entity){
String[] details = new String[3];
String retSrc = null;
String retSrc;

if(entity!=null) { //null => user does not exist

Expand Down Expand Up @@ -183,9 +187,7 @@ private String[] getDetailsFromJSON(HttpEntity entity){
details[2] = "-";
}

} catch (IOException e) {
e.printStackTrace();
} catch (JSONException e) {
} catch (IOException | JSONException e) {
e.printStackTrace();
}

Expand Down Expand Up @@ -214,8 +216,6 @@ private ArrayList<String> fetchVideoIds(String filenameArgument){
while ((line = br.readLine()) != null) {
videoIDs.add(line); //adds the videoID in the list
}
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
Expand All @@ -237,4 +237,22 @@ private JsonObject createJSONfor(String comment,String authorID,String gender,St
return jsonObject;
}

/**
* Builds JSON object for non deprecated attributes
* @param comment The comment's text
* @param authorID The author's ID
* @param name The author's YouTube username
* @return JSON object containing the above information
*/
private JsonObject createJSONfor(String comment,String authorID,String name){

JsonObject jsonObject = factory.createObjectBuilder()
.add("comment", comment)
.add("authorID",authorID)
.add("authorName",name)
.build();

return jsonObject;
}

}
2 changes: 1 addition & 1 deletion src/youtube/YoutubeExporter.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public static void main(String args[]){
for(JsonObject json : jsons){
String comment = json.getString("comment"); //gets the comment field
//String preprocessed = Preprocessor.preprocessComment(comment);
//PREPROCESSED TEXT SHOULD BE SAVEN INSIDE MONGOCONNECTOR (SOFIA)
//PREPROCESSED TEXT SHOULD BE SAVED INSIDE MONGOCONNECTOR (SOFIA)
}

for (String parameter : args) {
Expand Down

0 comments on commit dd04b1b

Please sign in to comment.