From 6970ece4a4f858df651d51e9e84ba289a6fdc9df Mon Sep 17 00:00:00 2001 From: Sofia Yfantidou Date: Mon, 19 Sep 2016 15:09:42 +0300 Subject: [PATCH] Adds sentiment analysis for whole case studies --- src/analytics/AnalyticsExtractor.java | 2 +- src/combiner/Combiner.java | 4 +- src/mongo/MongoConnector.java | 27 +++++++ src/sentiment/Analysis.java | 112 ++++++++++++++++++++++---- src/sentiment/Month.java | 93 +++++++++++++++++++++ src/sentiment/SentimentAnalysis.java | 4 +- 6 files changed, 222 insertions(+), 20 deletions(-) create mode 100644 src/sentiment/Month.java diff --git a/src/analytics/AnalyticsExtractor.java b/src/analytics/AnalyticsExtractor.java index fac5566..90c2ca3 100644 --- a/src/analytics/AnalyticsExtractor.java +++ b/src/analytics/AnalyticsExtractor.java @@ -13,7 +13,7 @@ * Class to produce the analytics * Created by sifantid on 5/5/2016. */ -public class AnalyticsExtractor { +public class AnalyticsExtractor { private static MongoConnector mc; /** diff --git a/src/combiner/Combiner.java b/src/combiner/Combiner.java index 46c4e89..3a98c3c 100644 --- a/src/combiner/Combiner.java +++ b/src/combiner/Combiner.java @@ -29,7 +29,7 @@ public static void main(String[] args) throws JSONException, IOException { if(args != null) { -/* // Collect Youtube comments and insert them to DB + /*// Collect Youtube comments and insert them to DB YoutubeExporter.main(args); Preprocessor.preprocessComments(args[4].split("=")[1], fc); @@ -49,6 +49,8 @@ public static void main(String[] args) throws JSONException, IOException { Analysis analysis = new Analysis(args[4].split("=")[1]); //analysis.analyze("twitter"); //analysis.analyze("youtube"); + analysis.analyzeCase(); + } } diff --git a/src/mongo/MongoConnector.java b/src/mongo/MongoConnector.java index 498256f..b328e91 100644 --- a/src/mongo/MongoConnector.java +++ b/src/mongo/MongoConnector.java @@ -150,6 +150,11 @@ public HashMap getTweets() { return getDocuments(tweetsCollection,_TWEET_JSON_); } + public HashMap getFullTweets() { + MongoCollection tweetsCollection = _db.getCollection(_coll_name_twitter); + return getDocuments(tweetsCollection); + } + /** * Gets all parsed tweets from twitter database * @return Pairs of tweets' IDs and tweets' parsed text @@ -221,6 +226,15 @@ public HashMap getComments() { return getDocuments(commentsCollection,_COMMENT_JSON_); } + /** + * Gets all youtube comments from youtube database + * @return Pairs of comments' IDs and comments' JSONs + */ + public HashMap getFullComments() { + MongoCollection commentsCollection = _db_youtube.getCollection(_coll_name_youtube); + return getDocuments(commentsCollection); + } + /** * Gets all youtube comments' parsed text * @return Pairs of comments' IDs and comments' parsed texts @@ -269,6 +283,19 @@ private HashMap getDocuments(MongoCollection col, return docs; } + private HashMap getDocuments(MongoCollection col) { + FindIterable iterable = col.find(); + + HashMap docs = new HashMap<>(); + + for (Document doc: iterable) { + ObjectId id = doc.getObjectId(_COLL_INDEX_); + JSONObject json = new JSONObject(doc); + docs.put(id,json); + } + return docs; + } + /** * Centralised management for error displaying * @param str the error message diff --git a/src/sentiment/Analysis.java b/src/sentiment/Analysis.java index 41e0012..895ff67 100644 --- a/src/sentiment/Analysis.java +++ b/src/sentiment/Analysis.java @@ -1,12 +1,18 @@ package sentiment; +import analytics.AnalyticsExtractor; import mongo.MongoConnector; import org.bson.types.ObjectId; import javafx.util.Pair; +import org.json.JSONException; +import org.json.JSONObject; + +import java.io.*; import java.util.Map.Entry; -import java.io.IOException; import java.util.*; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * Custom class the performs the pre-processing and sentiment analysis on tweets @@ -18,9 +24,11 @@ public class Analysis { private HashMap> representativeWords; private SenticNet senticNetLib; private MongoConnector mongoConnector; + private String collectionName; /** * Constructor of the class; Initializes all variables + * @param collectionName The name of the collection at hand * @throws IOException */ public Analysis(String collectionName) throws IOException { @@ -29,6 +37,8 @@ public Analysis(String collectionName) throws IOException { String senticNetFilename = "./data/senticnet3.rdf.xml"; senticNetLib = new SenticNet(senticNetFilename); + this.collectionName = collectionName; + mongoConnector = new MongoConnector("localhost", 27017, collectionName); SentimentAnalysis sentiment = new SentimentAnalysis(); @@ -36,8 +46,93 @@ public Analysis(String collectionName) throws IOException { representativeWords = sentiment.getRepresentativeWords(); } + /** + * Sentiment analysis of a case's tweets + * @throws JSONException In case a field cannot be found + */ + public void analyzeCase() throws JSONException { + HashMap tweets = mongoConnector.getFullTweets(); // Get all tweets + HashMap months = new HashMap<>(); // To save emotions per month + String month; + String year; + String key; // map key + for (JSONObject tweet : tweets.values()) { // For each tweet + month = getMonth(tweet.getJSONObject("tweet").getString("date")); // Find month + year = getYear(tweet.getJSONObject("tweet").getString("date")); // Find year + key = year + "_" + month; + + months.putIfAbsent(key, new Month(month,year)); // If it's the first tweet of the month, add month + // Increment feelings + months.get(key).addFeelingCount(tweet.getJSONObject("emScores").getDouble("ANGER"), + tweet.getJSONObject("emScores").getDouble("DISGUST"), + tweet.getJSONObject("emScores").getDouble("FEAR"), + tweet.getJSONObject("emScores").getDouble("JOY"), + tweet.getJSONObject("emScores").getDouble("SADNESS"), + tweet.getJSONObject("emScores").getDouble("SURPRISE")); + } + + for (Month monthObject : months.values()) { // For each month + monthObject.finalizeFeelings(); // Find the feelings for the whole month + writeFeelingsToFile(monthObject); + } + } + + /** + * Writes sentiment scores for a specific month to file + * @param monthObject All the month related information to be written to file + */ + private void writeFeelingsToFile(Month monthObject) { + String path = "out\\" + collectionName + "\\" + monthObject.getYear() + "_" + monthObject.getMonth() + ".txt"; + try (Writer writer = new BufferedWriter(new OutputStreamWriter( + new FileOutputStream(path), "utf-8"))) { + writer.write("ANGER" + " , " + monthObject.getAngerCount()); + writer.write(System.lineSeparator()); + writer.write("DISGUST" + " , " + monthObject.getDisgustCount()); + writer.write(System.lineSeparator()); + writer.write("FEAR" + " , " + monthObject.getFearCount()); + writer.write(System.lineSeparator()); + writer.write("JOY" + " , " + monthObject.getJoyCount()); + writer.write(System.lineSeparator()); + writer.write("SADNESS" + " , " + monthObject.getSadnessCount()); + writer.write(System.lineSeparator()); + writer.write("SURPRISE" + " , " + monthObject.getSurpriseCount()); + writer.write(System.lineSeparator()); + writer.write("Total month tweets" + " : " + monthObject.getCount()); + writer.write(System.lineSeparator()); + } catch (IOException e) { + e.printStackTrace(); + } + } + + /** + * Get the month out of a date + * @param date The date string + * @return The month string + */ + private String getMonth(String date) { + Matcher m = Pattern.compile("\\/(\\d{2})\\/").matcher(date); + if(m.find()) { + return m.group(1); + } + return ""; + } + + /** + * Get the year out of a date + * @param date The date string + * @return The year string + */ + private String getYear(String date) { + Matcher m = Pattern.compile("(\\d{4})\\/").matcher(date); + if(m.find()) { + return m.group(1); + } + return ""; + } + /** * Inserts the processed tweets and the emotions in the DB + * @param dbType The type of the database; can be either twitter or youtube * @throws IOException */ public void analyze(String dbType) throws IOException { @@ -69,22 +164,7 @@ public void analyze(String dbType) throws IOException { * @throws IOException */ private List> sentiment(String tweet) throws IOException { - //System.out.println("----------------------------------------------------"); tweet = tweet.concat(" "); - //System.out.println("Tweet: " + tweet); - // If you want to apply stemming techniques to the tweet remove the comment characters - /*String[] words = tweet.split(" "); - String[] stemmedWords = new String[words.length]; - for(int i = 0; i < words.length; i++) { - stemmedWords[i] = stemmer.stemm(words[i]) + " "; - } - StringBuilder strBuilder = new StringBuilder(); - for (int i = 0; i < stemmedWords.length; i++) { - strBuilder.append(stemmedWords[i]); - } - tweet = strBuilder.toString(); - - System.out.println("Stemmed Tweet: " + tweet);*/ List> scores = new ArrayList<>(); int[] myCounter = new int[6]; diff --git a/src/sentiment/Month.java b/src/sentiment/Month.java new file mode 100644 index 0000000..5e70762 --- /dev/null +++ b/src/sentiment/Month.java @@ -0,0 +1,93 @@ +package sentiment; + + +/** + * Class to represent all the month related information for sentiment analysis + * Created by sifantid on 19/9/2016. + */ +public class Month { + private String month; + private String year; + + private Double angerCount; + private Double disgustCount; + private Double fearCount; + private Double joyCount; + private Double sadnessCount; + private Double surpriseCount; + + private int count; + + public Month(String month, String year) { + this.month = month; + this.year = year; + this.angerCount = 0.0; + this.disgustCount = 0.0; + this.fearCount = 0.0; + this.joyCount = 0.0; + this.sadnessCount = 0.0; + this.surpriseCount = 0.0; + this.count = 0; + } + + void addFeelingCount(Double angerCount, Double disgustCount, Double fearCount, Double joyCount, + Double sadnessCount, Double surpriseCount) { + this.angerCount = this.angerCount + angerCount; + this.disgustCount = this.disgustCount + disgustCount; + this.fearCount = this.fearCount + fearCount; + this.joyCount = this.joyCount + joyCount; + this.sadnessCount = this.sadnessCount + sadnessCount; + this.surpriseCount = this.surpriseCount + surpriseCount; + incrementCount(); + } + + void finalizeFeelings() { + angerCount /= count; + disgustCount /= count; + fearCount /= count; + joyCount /= count; + sadnessCount /= count; + surpriseCount /= count; + } + + private void incrementCount() { + count += 1; + } + + public String getMonth() { + + return month; + } + + String getYear() { + return year; + } + + Double getAngerCount() { + return angerCount; + } + + Double getDisgustCount() { + return disgustCount; + } + + Double getFearCount() { + return fearCount; + } + + Double getJoyCount() { + return joyCount; + } + + Double getSadnessCount() { + return sadnessCount; + } + + Double getSurpriseCount() { + return surpriseCount; + } + + int getCount() { + return count; + } +} diff --git a/src/sentiment/SentimentAnalysis.java b/src/sentiment/SentimentAnalysis.java index 41fdf0c..dc10956 100644 --- a/src/sentiment/SentimentAnalysis.java +++ b/src/sentiment/SentimentAnalysis.java @@ -159,11 +159,11 @@ HashMap> getRepresentativeWords() { representativesTemp.addAll(basicEmotions.get(e)); representativesTemp = eliminateDuplicates(representativesTemp); //representativesTemp.sort(String.CASE_INSENSITIVE_ORDER); - System.out.println("Emotion: " + e); + /*System.out.println("Emotion: " + e); for(String word:representativesTemp) { System.out.println("Word: " + word); } - System.out.println("-------------------------------------------------------------------------------------------"); + System.out.println("-------------------------------------------------------------------------------------------");*/ representativeWords.put(e,representativesTemp); }