Added twitter analysis files

shubham76 · Mar 26, 2018 · 6f11019 · 6f11019
1 parent 95484ff
commit 6f11019
Show file tree

Hide file tree

Showing 7 changed files with 151 additions and 2 deletions.
diff --git a/Business News Analysis/requirements.txt b/Business News Analysis/requirements.txt
diff --git a/Business News Analysis/stocknews/ReadME.md b/Business News Analysis/stocknews/ReadME.md
@@ -7,4 +7,5 @@ To run on local machine:
 `scrapy crawl livemint_spider`
 
 To generate a CSV file:
+
 `scrapy crawl livemint_spider -o file.csv -t csv` 
diff --git a/Business News Analysis/stocknews/stocknews/spiders/urlGenerator.py b/Business News Analysis/stocknews/stocknews/spiders/urlGenerator.py
@@ -0,0 +1,24 @@
+import scrapy
+import pandas as pd
+import re
+
+from stocknews.items import StocknewsItem
+
+class urlGenerator(scrapy.Spider):
+
+	#spider name
+    name = "urlGenerator"
+    #domains
+    allowed_domains = ["livemint.com"]
+    #urls
+    start_urls = []
+    file_name = '../../livemint_data_3.csv'
+    df = pd.read_csv(file_name,encoding='iso-8859-1')
+    start_urls = df['href'].tolist()
+
+    base_url = "https://www.livemint.com/Query/lZy3FU0kP9Cso5deYypuDI/people.html?facet=subSection&page="
+
+    def parse(self, response):
+    	print start_urls
+
+
diff --git a/Business News Analysis/stocknews/stocknews/spiders/urlGenerator.pyc b/Business News Analysis/stocknews/stocknews/spiders/urlGenerator.pyc
diff --git a/Business News Analysis/twt_sentiment_analyser.py b/Business News Analysis/twt_sentiment_analyser.py
@@ -0,0 +1,119 @@
+import re
+import tweepy
+from tweepy import OAuthHandler
+from textblob import TextBlob
+
+class TwitterClient(object):
+    '''
+    Generic Twitter Class for sentiment analysis.
+    '''
+    def __init__(self):
+        '''
+        Class constructor or initialization method.
+        '''
+        # keys and tokens from the Twitter Dev Console
+        consumer_key = '5t56s0IVP6wt9nDYQt4V1vz9G'
+        consumer_secret = 'RwrhzqZEGptj4FQEBS2Rxft38WiKNrEzxbE3WBkmATVGf1Vj40'
+        access_token = '543284380-Ity3NkRNf80XnU6wCnrJCXZfIumrI4JrRUBx2VZZ'
+        access_token_secret = '0shseAXCMKiH8JQXb4vzCCyBkOLNgNShRglgkxrceBDgz'
+
+        # attempt authentication
+        try:
+            # create OAuthHandler object
+            self.auth = OAuthHandler(consumer_key, consumer_secret)
+            # set access token and secret
+            self.auth.set_access_token(access_token, access_token_secret)
+            # create tweepy API object to fetch tweets
+            self.api = tweepy.API(self.auth)
+        except:
+            print("Error: Authentication Failed")
+
+    def clean_tweet(self, tweet):
+        '''
+        Utility function to clean tweet text by removing links, special characters
+        using simple regex statements.
+        '''
+        return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())
+
+    def get_tweet_sentiment(self, tweet):
+        '''
+        Utility function to classify sentiment of passed tweet
+        using textblob's sentiment method
+        '''
+        # create TextBlob object of passed tweet text
+        analysis = TextBlob(self.clean_tweet(tweet))
+        # set sentiment
+        if analysis.sentiment.polarity > 0:
+            return 'positive'
+        elif analysis.sentiment.polarity == 0:
+            return 'neutral'
+        else:
+            return 'negative'
+
+    def get_tweets(self, query, count = 10):
+        '''
+        Main function to fetch tweets and parse them.
+        '''
+        # empty list to store parsed tweets
+        tweets = []
+
+        try:
+            # call twitter api to fetch tweets
+            fetched_tweets = self.api.search(q = query, count = count)
+
+            # parsing tweets one by one
+            for tweet in fetched_tweets:
+                # empty dictionary to store required params of a tweet
+                parsed_tweet = {}
+
+                # saving text of tweet
+                parsed_tweet['text'] = tweet.text
+                # saving sentiment of tweet
+                parsed_tweet['sentiment'] = self.get_tweet_sentiment(tweet.text)
+
+                # appending parsed tweet to tweets list
+                if tweet.retweet_count > 0:
+                    # if tweet has retweets, ensure that it is appended only once
+                    if parsed_tweet not in tweets:
+                        tweets.append(parsed_tweet)
+                else:
+                    tweets.append(parsed_tweet)
+
+            # return parsed tweets
+            return tweets
+
+        except tweepy.TweepError as e:
+            # print error (if any)
+            print("Error : " + str(e))
+
+def main():
+    # creating object of TwitterClient Class
+    api = TwitterClient()
+    # calling function to get tweets
+    tweets = api.get_tweets(query = 'TCS Tata', count = 1000)
+
+    # picking positive tweets from tweets
+    ptweets = [tweet for tweet in tweets if tweet['sentiment'] == 'positive']
+    # percentage of positive tweets
+    positive_percentage = 100*len(ptweets)/len(tweets)
+    print("Positive tweets percentage: %d" % positive_percentage)
+
+    # picking negative tweets from tweets
+    ntweets = [tweet for tweet in tweets if tweet['sentiment'] == 'negative']
+    # percentage of negative tweets
+    negative_percentage = 100*len(ntweets)/len(tweets)
+    print("Negative tweets percentage: %d" % negative_percentage)
+
+    # printing first 5 positive tweets
+    print("\n\nPositive tweets:")
+    for tweet in ptweets[:10]:
+        print(tweet['text'])
+
+    # printing first 5 negative tweets
+    print("\n\nNegative tweets:")
+    for tweet in ntweets[:10]:
+        print(tweet['text'])
+
+if __name__ == "__main__":
+    # calling main function
+    main()
diff --git a/Twitter Analysis b/Twitter Analysis
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,6 @@
+scrapy
+pandas
+tweepy
+textblob
+pyquery
+TextBlob
Original file line number	Diff line number	Diff line change
Expand Up		@@ -7,4 +7,5 @@ To run on local machine:
		`scrapy crawl livemint_spider`

		To generate a CSV file:

		`scrapy crawl livemint_spider -o file.csv -t csv`