Skip to content

Commit

Permalink
Adds sentiment analysis for whole case studies
Browse files Browse the repository at this point in the history
  • Loading branch information
syfantid committed Sep 19, 2016
1 parent fd55ff9 commit 6970ece
Show file tree
Hide file tree
Showing 6 changed files with 222 additions and 20 deletions.
2 changes: 1 addition & 1 deletion src/analytics/AnalyticsExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
* Class to produce the analytics
* Created by sifantid on 5/5/2016.
*/
public class AnalyticsExtractor {
public class AnalyticsExtractor {
private static MongoConnector mc;

/**
Expand Down
4 changes: 3 additions & 1 deletion src/combiner/Combiner.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public static void main(String[] args) throws JSONException, IOException {


if(args != null) {
/* // Collect Youtube comments and insert them to DB
/*// Collect Youtube comments and insert them to DB
YoutubeExporter.main(args);
Preprocessor.preprocessComments(args[4].split("=")[1], fc);
Expand All @@ -49,6 +49,8 @@ public static void main(String[] args) throws JSONException, IOException {
Analysis analysis = new Analysis(args[4].split("=")[1]);
//analysis.analyze("twitter");
//analysis.analyze("youtube");
analysis.analyzeCase();

}

}
Expand Down
27 changes: 27 additions & 0 deletions src/mongo/MongoConnector.java
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,11 @@ public HashMap<ObjectId,JSONObject> getTweets() {
return getDocuments(tweetsCollection,_TWEET_JSON_);
}

public HashMap<ObjectId,JSONObject> getFullTweets() {
MongoCollection<Document> tweetsCollection = _db.getCollection(_coll_name_twitter);
return getDocuments(tweetsCollection);
}

/**
* Gets all parsed tweets from twitter database
* @return Pairs of tweets' IDs and tweets' parsed text
Expand Down Expand Up @@ -221,6 +226,15 @@ public HashMap<ObjectId,JSONObject> getComments() {
return getDocuments(commentsCollection,_COMMENT_JSON_);
}

/**
* Gets all youtube comments from youtube database
* @return Pairs of comments' IDs and comments' JSONs
*/
public HashMap<ObjectId,JSONObject> getFullComments() {
MongoCollection<Document> commentsCollection = _db_youtube.getCollection(_coll_name_youtube);
return getDocuments(commentsCollection);
}

/**
* Gets all youtube comments' parsed text
* @return Pairs of comments' IDs and comments' parsed texts
Expand Down Expand Up @@ -269,6 +283,19 @@ private HashMap<ObjectId,JSONObject> getDocuments(MongoCollection<Document> col,
return docs;
}

private HashMap<ObjectId,JSONObject> getDocuments(MongoCollection<Document> col) {
FindIterable<Document> iterable = col.find();

HashMap<ObjectId,JSONObject> docs = new HashMap<>();

for (Document doc: iterable) {
ObjectId id = doc.getObjectId(_COLL_INDEX_);
JSONObject json = new JSONObject(doc);
docs.put(id,json);
}
return docs;
}

/**
* Centralised management for error displaying
* @param str the error message
Expand Down
112 changes: 96 additions & 16 deletions src/sentiment/Analysis.java
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@
package sentiment;

import analytics.AnalyticsExtractor;
import mongo.MongoConnector;
import org.bson.types.ObjectId;
import javafx.util.Pair;
import org.json.JSONException;
import org.json.JSONObject;

import java.io.*;
import java.util.Map.Entry;

import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
* Custom class the performs the pre-processing and sentiment analysis on tweets
Expand All @@ -18,9 +24,11 @@ public class Analysis {
private HashMap<Emotions, ArrayList<String>> representativeWords;
private SenticNet senticNetLib;
private MongoConnector mongoConnector;
private String collectionName;

/**
* Constructor of the class; Initializes all variables
* @param collectionName The name of the collection at hand
* @throws IOException
*/
public Analysis(String collectionName) throws IOException {
Expand All @@ -29,15 +37,102 @@ public Analysis(String collectionName) throws IOException {
String senticNetFilename = "./data/senticnet3.rdf.xml";
senticNetLib = new SenticNet(senticNetFilename);

this.collectionName = collectionName;

mongoConnector = new MongoConnector("localhost", 27017, collectionName);

SentimentAnalysis sentiment = new SentimentAnalysis();

representativeWords = sentiment.getRepresentativeWords();
}

/**
* Sentiment analysis of a case's tweets
* @throws JSONException In case a field cannot be found
*/
public void analyzeCase() throws JSONException {
HashMap<ObjectId, JSONObject> tweets = mongoConnector.getFullTweets(); // Get all tweets
HashMap<String, Month> months = new HashMap<>(); // To save emotions per month
String month;
String year;
String key; // map key
for (JSONObject tweet : tweets.values()) { // For each tweet
month = getMonth(tweet.getJSONObject("tweet").getString("date")); // Find month
year = getYear(tweet.getJSONObject("tweet").getString("date")); // Find year
key = year + "_" + month;

months.putIfAbsent(key, new Month(month,year)); // If it's the first tweet of the month, add month
// Increment feelings
months.get(key).addFeelingCount(tweet.getJSONObject("emScores").getDouble("ANGER"),
tweet.getJSONObject("emScores").getDouble("DISGUST"),
tweet.getJSONObject("emScores").getDouble("FEAR"),
tweet.getJSONObject("emScores").getDouble("JOY"),
tweet.getJSONObject("emScores").getDouble("SADNESS"),
tweet.getJSONObject("emScores").getDouble("SURPRISE"));
}

for (Month monthObject : months.values()) { // For each month
monthObject.finalizeFeelings(); // Find the feelings for the whole month
writeFeelingsToFile(monthObject);
}
}

/**
* Writes sentiment scores for a specific month to file
* @param monthObject All the month related information to be written to file
*/
private void writeFeelingsToFile(Month monthObject) {
String path = "out\\" + collectionName + "\\" + monthObject.getYear() + "_" + monthObject.getMonth() + ".txt";
try (Writer writer = new BufferedWriter(new OutputStreamWriter(
new FileOutputStream(path), "utf-8"))) {
writer.write("ANGER" + " , " + monthObject.getAngerCount());
writer.write(System.lineSeparator());
writer.write("DISGUST" + " , " + monthObject.getDisgustCount());
writer.write(System.lineSeparator());
writer.write("FEAR" + " , " + monthObject.getFearCount());
writer.write(System.lineSeparator());
writer.write("JOY" + " , " + monthObject.getJoyCount());
writer.write(System.lineSeparator());
writer.write("SADNESS" + " , " + monthObject.getSadnessCount());
writer.write(System.lineSeparator());
writer.write("SURPRISE" + " , " + monthObject.getSurpriseCount());
writer.write(System.lineSeparator());
writer.write("Total month tweets" + " : " + monthObject.getCount());
writer.write(System.lineSeparator());
} catch (IOException e) {
e.printStackTrace();
}
}

/**
* Get the month out of a date
* @param date The date string
* @return The month string
*/
private String getMonth(String date) {
Matcher m = Pattern.compile("\\/(\\d{2})\\/").matcher(date);
if(m.find()) {
return m.group(1);
}
return "";
}

/**
* Get the year out of a date
* @param date The date string
* @return The year string
*/
private String getYear(String date) {
Matcher m = Pattern.compile("(\\d{4})\\/").matcher(date);
if(m.find()) {
return m.group(1);
}
return "";
}

/**
* Inserts the processed tweets and the emotions in the DB
* @param dbType The type of the database; can be either twitter or youtube
* @throws IOException
*/
public void analyze(String dbType) throws IOException {
Expand Down Expand Up @@ -69,22 +164,7 @@ public void analyze(String dbType) throws IOException {
* @throws IOException
*/
private List<Pair<String, Double>> sentiment(String tweet) throws IOException {
//System.out.println("----------------------------------------------------");
tweet = tweet.concat(" ");
//System.out.println("Tweet: " + tweet);
// If you want to apply stemming techniques to the tweet remove the comment characters
/*String[] words = tweet.split(" ");
String[] stemmedWords = new String[words.length];
for(int i = 0; i < words.length; i++) {
stemmedWords[i] = stemmer.stemm(words[i]) + " ";
}
StringBuilder strBuilder = new StringBuilder();
for (int i = 0; i < stemmedWords.length; i++) {
strBuilder.append(stemmedWords[i]);
}
tweet = strBuilder.toString();
System.out.println("Stemmed Tweet: " + tweet);*/
List<Pair<String, Double>> scores = new ArrayList<>();

int[] myCounter = new int[6];
Expand Down
93 changes: 93 additions & 0 deletions src/sentiment/Month.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package sentiment;


/**
* Class to represent all the month related information for sentiment analysis
* Created by sifantid on 19/9/2016.
*/
public class Month {
private String month;
private String year;

private Double angerCount;
private Double disgustCount;
private Double fearCount;
private Double joyCount;
private Double sadnessCount;
private Double surpriseCount;

private int count;

public Month(String month, String year) {
this.month = month;
this.year = year;
this.angerCount = 0.0;
this.disgustCount = 0.0;
this.fearCount = 0.0;
this.joyCount = 0.0;
this.sadnessCount = 0.0;
this.surpriseCount = 0.0;
this.count = 0;
}

void addFeelingCount(Double angerCount, Double disgustCount, Double fearCount, Double joyCount,
Double sadnessCount, Double surpriseCount) {
this.angerCount = this.angerCount + angerCount;
this.disgustCount = this.disgustCount + disgustCount;
this.fearCount = this.fearCount + fearCount;
this.joyCount = this.joyCount + joyCount;
this.sadnessCount = this.sadnessCount + sadnessCount;
this.surpriseCount = this.surpriseCount + surpriseCount;
incrementCount();
}

void finalizeFeelings() {
angerCount /= count;
disgustCount /= count;
fearCount /= count;
joyCount /= count;
sadnessCount /= count;
surpriseCount /= count;
}

private void incrementCount() {
count += 1;
}

public String getMonth() {

return month;
}

String getYear() {
return year;
}

Double getAngerCount() {
return angerCount;
}

Double getDisgustCount() {
return disgustCount;
}

Double getFearCount() {
return fearCount;
}

Double getJoyCount() {
return joyCount;
}

Double getSadnessCount() {
return sadnessCount;
}

Double getSurpriseCount() {
return surpriseCount;
}

int getCount() {
return count;
}
}
4 changes: 2 additions & 2 deletions src/sentiment/SentimentAnalysis.java
Original file line number Diff line number Diff line change
Expand Up @@ -159,11 +159,11 @@ HashMap<Emotions,ArrayList<String>> getRepresentativeWords() {
representativesTemp.addAll(basicEmotions.get(e));
representativesTemp = eliminateDuplicates(representativesTemp);
//representativesTemp.sort(String.CASE_INSENSITIVE_ORDER);
System.out.println("Emotion: " + e);
/*System.out.println("Emotion: " + e);
for(String word:representativesTemp) {
System.out.println("Word: " + word);
}
System.out.println("-------------------------------------------------------------------------------------------");
System.out.println("-------------------------------------------------------------------------------------------");*/
representativeWords.put(e,representativesTemp);
}

Expand Down

0 comments on commit 6970ece

Please sign in to comment.