Skip to content

Commit

Permalink
Adds sentiment evaluation methods
Browse files Browse the repository at this point in the history
  • Loading branch information
syfantid committed Jun 15, 2017
1 parent 9172c67 commit e4385f0
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 7 deletions.
7 changes: 2 additions & 5 deletions src/combiner/Combiner.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,14 @@ public static void main(String[] args) throws JSONException, IOException {

if(args != null) {
// Collect Youtube comments and insert them to DB
/* YoutubeExporter.main(args);
Preprocessor.preprocessComments(args[4].split("=")[1], fc); */
YoutubeExporter.main(args);
Preprocessor.preprocessComments(args[4].split("=")[1], fc);


// Collect tweets and insert them to DB
TwitterExporter.main(args);
Preprocessor.preprocessTweets(args[4].split("=")[1], fc, args[5].split("=")[1]);

/*fc.exportFrequencies(); //creates frequencies.txt - sorted alphabetically
fc.exportFrequenciesByValue(); //creates frequenciesByValue.txt - sorted by frequencies (descending order)*/

if(args.length >= 5) {
analyticsExtractor = new AnalyticsExtractor(args[4].split("=")[1]);
analyticsExtractor.analyze();
Expand Down
2 changes: 1 addition & 1 deletion src/combiner/Preprocessor.java
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ private static String getSubstances(String parsedTweet, String tweet) {
* @param input The tweet's text before the processing
* @return The parsed tweet text
*/
private static String preprocessTweet(String input){
public static String preprocessTweet(String input){
input = prepareText(input);
String[] tokens = tokenizer(input);

Expand Down
123 changes: 123 additions & 0 deletions src/evaluation/Evaluator.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
package evaluation;

import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;

import combiner.Preprocessor;
import javafx.util.Pair;
import sentiment.*;

import java.util.List;
import java.util.Scanner;

/**
* Created by sifantid on 15/6/2017.
*/
public class Evaluator {

private static HashMap<String,String> annotatedData;
private static Analysis analyzer;

/**
* Keep only relevant data from annotated set (6 basic emotions and neutral sentiment)
*/
static {
// Prepare new annotated dataset
annotatedData = new HashMap<>();
// Read the file and keep only relevant lines (referring to 6 basic emotions or neutral)
try {
Scanner input = new Scanner(new FileReader("./resources/annotatedData.txt"));
String emotion;
String tweet;
String[] line;
while (input.hasNext()) {
line = input.nextLine().split("\\t"); // Each tweet of the annotated dataset
emotion = line[line.length-1]; // Get last word - emotion
tweet = line[line.length-2]; // Get the related tweet
if(emotion.compareTo("joy") == 0 || emotion.compareTo("surprise") == 0 ||
emotion.compareTo("fear") == 0 || emotion.compareTo("anger") == 0 ||
emotion.compareTo("neutral") == 0 || emotion.compareTo("sadness") == 0 ||
emotion.compareTo("disgust") == 0 ) {
annotatedData.put(tweet,emotion);
}
}
} catch (FileNotFoundException e) {
e.printStackTrace();
}

try {
analyzer = new Analysis();
} catch (IOException e) {
e.printStackTrace();
}
}


public static void main(String[] args) {
try {
System.out.println("Percentage: " + analyzeAnnotated());
} catch (IOException e) {
e.printStackTrace();
}
}


/**
* Analyzes the annotated dataset
* @return The percentage of correct sentiment labeling (using local algorithm) for an annotated dataset
* @throws IOException
*/
private static double analyzeAnnotated() throws IOException {
int correct = 0; // The number of correct tweet sentiment
String parsed;
String maxSentiment;
List<Pair<String, Double>> localSentiment;
for(String tweet : annotatedData.keySet()) { // For each tweet check if emotion is right
parsed = Preprocessor.preprocessTweet(tweet); // Preprocess tweet
localSentiment = analyzer.sentiment(parsed); // Calculate sentiment
// System.out.println("Tweet: " + tweet + "\n" + localSentiment);
maxSentiment = calculateMaxSentiment(localSentiment); // Find main sentiment based on local algorithm
// System.out.println("------------------------------> " + maxSentiment);
// System.out.println("------------------------------> Expected Sentiment: " + annotatedData.get(tweet));
if(maxSentiment.toLowerCase().equals(annotatedData.get(tweet))) { // Compare local sentiment to annotation
correct++;
}
}
System.out.println("CORRECT: " + correct);
return calculatePercentage(correct, annotatedData.size()); // Get the final percentage of correct sentiment
}


/**
* Calculates the main tweet sentiment given a list of sentiment scores
* @param localSentiment List of sentiment and score pairs
* @return The main sentiment of th tweet
*/
private static String calculateMaxSentiment(List<Pair<String,Double>> localSentiment) {
double max = 0.0;
String mainEmotion = "neutral";
for(Pair<String,Double> emotion : localSentiment) {
if(emotion.getValue() > max) {
mainEmotion = emotion.getKey();
max = emotion.getValue();
}
}
return mainEmotion;
}

/**
* Calculates a percentage
* @param noItems The nominator
* @param outOf The denominator
* @return The percentage
*/
private static double calculatePercentage(int noItems, int outOf) {
if(outOf != 0) {
return (double) noItems / outOf;
} else {
throw new IllegalArgumentException("Division by zero");
}
}
}
5 changes: 5 additions & 0 deletions src/evaluation/package-info.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
/**
* Created by sifantid on 15/6/2017.
* Package to evaluate the sentiment analysis method of this project
*/
package evaluation;
17 changes: 16 additions & 1 deletion src/sentiment/Analysis.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,21 @@ public Analysis(String collectionName) throws IOException {
representativeWords = sentiment.getRepresentativeWords();
}

/**
* Constructor of the class; Initializes all variables
* @throws IOException
*/
public Analysis() throws IOException {
emotions = Emotions.values();

String senticNetFilename = "./data/senticnet3.rdf.xml";
senticNetLib = new SenticNet(senticNetFilename);

SentimentAnalysis sentiment = new SentimentAnalysis();

representativeWords = sentiment.getRepresentativeWords();
}

/**
* Sentiment analysis for the whole case
* @throws JSONException In case a field cannot be found
Expand Down Expand Up @@ -191,7 +206,7 @@ private void analyze(String dbType) throws IOException {
* @return List of pairs (Emotion,Score) for the given tweet
* @throws IOException
*/
private List<Pair<String, Double>> sentiment(String tweet) throws IOException {
public List<Pair<String, Double>> sentiment(String tweet) throws IOException {
tweet = tweet.concat(" ");
List<Pair<String, Double>> scores = new ArrayList<>();

Expand Down

0 comments on commit e4385f0

Please sign in to comment.