-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
151 additions
and
2 deletions.
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
24 changes: 24 additions & 0 deletions
24
Business News Analysis/stocknews/stocknews/spiders/urlGenerator.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import scrapy | ||
import pandas as pd | ||
import re | ||
|
||
from stocknews.items import StocknewsItem | ||
|
||
class urlGenerator(scrapy.Spider): | ||
|
||
#spider name | ||
name = "urlGenerator" | ||
#domains | ||
allowed_domains = ["livemint.com"] | ||
#urls | ||
start_urls = [] | ||
file_name = '../../livemint_data_3.csv' | ||
df = pd.read_csv(file_name,encoding='iso-8859-1') | ||
start_urls = df['href'].tolist() | ||
|
||
base_url = "https://www.livemint.com/Query/lZy3FU0kP9Cso5deYypuDI/people.html?facet=subSection&page=" | ||
|
||
def parse(self, response): | ||
print start_urls | ||
|
||
|
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,119 @@ | ||
import re | ||
import tweepy | ||
from tweepy import OAuthHandler | ||
from textblob import TextBlob | ||
|
||
class TwitterClient(object): | ||
''' | ||
Generic Twitter Class for sentiment analysis. | ||
''' | ||
def __init__(self): | ||
''' | ||
Class constructor or initialization method. | ||
''' | ||
# keys and tokens from the Twitter Dev Console | ||
consumer_key = '5t56s0IVP6wt9nDYQt4V1vz9G' | ||
consumer_secret = 'RwrhzqZEGptj4FQEBS2Rxft38WiKNrEzxbE3WBkmATVGf1Vj40' | ||
access_token = '543284380-Ity3NkRNf80XnU6wCnrJCXZfIumrI4JrRUBx2VZZ' | ||
access_token_secret = '0shseAXCMKiH8JQXb4vzCCyBkOLNgNShRglgkxrceBDgz' | ||
|
||
# attempt authentication | ||
try: | ||
# create OAuthHandler object | ||
self.auth = OAuthHandler(consumer_key, consumer_secret) | ||
# set access token and secret | ||
self.auth.set_access_token(access_token, access_token_secret) | ||
# create tweepy API object to fetch tweets | ||
self.api = tweepy.API(self.auth) | ||
except: | ||
print("Error: Authentication Failed") | ||
|
||
def clean_tweet(self, tweet): | ||
''' | ||
Utility function to clean tweet text by removing links, special characters | ||
using simple regex statements. | ||
''' | ||
return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split()) | ||
|
||
def get_tweet_sentiment(self, tweet): | ||
''' | ||
Utility function to classify sentiment of passed tweet | ||
using textblob's sentiment method | ||
''' | ||
# create TextBlob object of passed tweet text | ||
analysis = TextBlob(self.clean_tweet(tweet)) | ||
# set sentiment | ||
if analysis.sentiment.polarity > 0: | ||
return 'positive' | ||
elif analysis.sentiment.polarity == 0: | ||
return 'neutral' | ||
else: | ||
return 'negative' | ||
|
||
def get_tweets(self, query, count = 10): | ||
''' | ||
Main function to fetch tweets and parse them. | ||
''' | ||
# empty list to store parsed tweets | ||
tweets = [] | ||
|
||
try: | ||
# call twitter api to fetch tweets | ||
fetched_tweets = self.api.search(q = query, count = count) | ||
|
||
# parsing tweets one by one | ||
for tweet in fetched_tweets: | ||
# empty dictionary to store required params of a tweet | ||
parsed_tweet = {} | ||
|
||
# saving text of tweet | ||
parsed_tweet['text'] = tweet.text | ||
# saving sentiment of tweet | ||
parsed_tweet['sentiment'] = self.get_tweet_sentiment(tweet.text) | ||
|
||
# appending parsed tweet to tweets list | ||
if tweet.retweet_count > 0: | ||
# if tweet has retweets, ensure that it is appended only once | ||
if parsed_tweet not in tweets: | ||
tweets.append(parsed_tweet) | ||
else: | ||
tweets.append(parsed_tweet) | ||
|
||
# return parsed tweets | ||
return tweets | ||
|
||
except tweepy.TweepError as e: | ||
# print error (if any) | ||
print("Error : " + str(e)) | ||
|
||
def main(): | ||
# creating object of TwitterClient Class | ||
api = TwitterClient() | ||
# calling function to get tweets | ||
tweets = api.get_tweets(query = 'TCS Tata', count = 1000) | ||
|
||
# picking positive tweets from tweets | ||
ptweets = [tweet for tweet in tweets if tweet['sentiment'] == 'positive'] | ||
# percentage of positive tweets | ||
positive_percentage = 100*len(ptweets)/len(tweets) | ||
print("Positive tweets percentage: %d" % positive_percentage) | ||
|
||
# picking negative tweets from tweets | ||
ntweets = [tweet for tweet in tweets if tweet['sentiment'] == 'negative'] | ||
# percentage of negative tweets | ||
negative_percentage = 100*len(ntweets)/len(tweets) | ||
print("Negative tweets percentage: %d" % negative_percentage) | ||
|
||
# printing first 5 positive tweets | ||
print("\n\nPositive tweets:") | ||
for tweet in ptweets[:10]: | ||
print(tweet['text']) | ||
|
||
# printing first 5 negative tweets | ||
print("\n\nNegative tweets:") | ||
for tweet in ntweets[:10]: | ||
print(tweet['text']) | ||
|
||
if __name__ == "__main__": | ||
# calling main function | ||
main() |
Submodule Twitter Analysis
added at
b9d5a1
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
scrapy | ||
pandas | ||
tweepy | ||
textblob | ||
pyquery | ||
TextBlob |