Skip to content

Commit

Permalink
Added twitter analysis files
Browse files Browse the repository at this point in the history
  • Loading branch information
shubham76 committed Mar 26, 2018
1 parent 95484ff commit 6f11019
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 2 deletions.
2 changes: 0 additions & 2 deletions Business News Analysis/requirements.txt

This file was deleted.

1 change: 1 addition & 0 deletions Business News Analysis/stocknews/ReadME.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@ To run on local machine:
`scrapy crawl livemint_spider`

To generate a CSV file:

`scrapy crawl livemint_spider -o file.csv -t csv`
24 changes: 24 additions & 0 deletions Business News Analysis/stocknews/stocknews/spiders/urlGenerator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import scrapy
import pandas as pd
import re

from stocknews.items import StocknewsItem

class urlGenerator(scrapy.Spider):

#spider name
name = "urlGenerator"
#domains
allowed_domains = ["livemint.com"]
#urls
start_urls = []
file_name = '../../livemint_data_3.csv'
df = pd.read_csv(file_name,encoding='iso-8859-1')
start_urls = df['href'].tolist()

base_url = "https://www.livemint.com/Query/lZy3FU0kP9Cso5deYypuDI/people.html?facet=subSection&page="

def parse(self, response):
print start_urls


Binary file not shown.
119 changes: 119 additions & 0 deletions Business News Analysis/twt_sentiment_analyser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
import re
import tweepy
from tweepy import OAuthHandler
from textblob import TextBlob

class TwitterClient(object):
'''
Generic Twitter Class for sentiment analysis.
'''
def __init__(self):
'''
Class constructor or initialization method.
'''
# keys and tokens from the Twitter Dev Console
consumer_key = '5t56s0IVP6wt9nDYQt4V1vz9G'
consumer_secret = 'RwrhzqZEGptj4FQEBS2Rxft38WiKNrEzxbE3WBkmATVGf1Vj40'
access_token = '543284380-Ity3NkRNf80XnU6wCnrJCXZfIumrI4JrRUBx2VZZ'
access_token_secret = '0shseAXCMKiH8JQXb4vzCCyBkOLNgNShRglgkxrceBDgz'

# attempt authentication
try:
# create OAuthHandler object
self.auth = OAuthHandler(consumer_key, consumer_secret)
# set access token and secret
self.auth.set_access_token(access_token, access_token_secret)
# create tweepy API object to fetch tweets
self.api = tweepy.API(self.auth)
except:
print("Error: Authentication Failed")

def clean_tweet(self, tweet):
'''
Utility function to clean tweet text by removing links, special characters
using simple regex statements.
'''
return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())

def get_tweet_sentiment(self, tweet):
'''
Utility function to classify sentiment of passed tweet
using textblob's sentiment method
'''
# create TextBlob object of passed tweet text
analysis = TextBlob(self.clean_tweet(tweet))
# set sentiment
if analysis.sentiment.polarity > 0:
return 'positive'
elif analysis.sentiment.polarity == 0:
return 'neutral'
else:
return 'negative'

def get_tweets(self, query, count = 10):
'''
Main function to fetch tweets and parse them.
'''
# empty list to store parsed tweets
tweets = []

try:
# call twitter api to fetch tweets
fetched_tweets = self.api.search(q = query, count = count)

# parsing tweets one by one
for tweet in fetched_tweets:
# empty dictionary to store required params of a tweet
parsed_tweet = {}

# saving text of tweet
parsed_tweet['text'] = tweet.text
# saving sentiment of tweet
parsed_tweet['sentiment'] = self.get_tweet_sentiment(tweet.text)

# appending parsed tweet to tweets list
if tweet.retweet_count > 0:
# if tweet has retweets, ensure that it is appended only once
if parsed_tweet not in tweets:
tweets.append(parsed_tweet)
else:
tweets.append(parsed_tweet)

# return parsed tweets
return tweets

except tweepy.TweepError as e:
# print error (if any)
print("Error : " + str(e))

def main():
# creating object of TwitterClient Class
api = TwitterClient()
# calling function to get tweets
tweets = api.get_tweets(query = 'TCS Tata', count = 1000)

# picking positive tweets from tweets
ptweets = [tweet for tweet in tweets if tweet['sentiment'] == 'positive']
# percentage of positive tweets
positive_percentage = 100*len(ptweets)/len(tweets)
print("Positive tweets percentage: %d" % positive_percentage)

# picking negative tweets from tweets
ntweets = [tweet for tweet in tweets if tweet['sentiment'] == 'negative']
# percentage of negative tweets
negative_percentage = 100*len(ntweets)/len(tweets)
print("Negative tweets percentage: %d" % negative_percentage)

# printing first 5 positive tweets
print("\n\nPositive tweets:")
for tweet in ptweets[:10]:
print(tweet['text'])

# printing first 5 negative tweets
print("\n\nNegative tweets:")
for tweet in ntweets[:10]:
print(tweet['text'])

if __name__ == "__main__":
# calling main function
main()
1 change: 1 addition & 0 deletions Twitter Analysis
Submodule Twitter Analysis added at b9d5a1
6 changes: 6 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
scrapy
pandas
tweepy
textblob
pyquery
TextBlob

0 comments on commit 6f11019

Please sign in to comment.