-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrapper.py
88 lines (81 loc) · 4.63 KB
/
scrapper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# Import required libraries
import pandas as pd
import numpy as np
import requests
import csv
import yfinance as yf
import datetime
import time
import io
# ---- WEB SCRAPER BEGIN -----
# Set date
start = datetime.datetime(2015,10,1)
end = datetime.datetime(2020,10,31)
#Load and store the data
Symbols = ['MMM','ABT','ABBV','ACN','ATVI','AYI','ADBE','AMD','AAP','AES','AET',
'AMG','AFL','A','APD','AKAM','ALK','ALB','ARE','ALXN','ALGN','ALLE',
'ADS','LNT','ALL','GOOGL','GOOG','MO','AMZN','AEE','AAL','AEP',
'AXP','AIG','AMT','AWK','AMP','ABC','AME','AMGN','APH','ADI','ANDV',
'ANSS','ANTM','AON','AOS','APA','AIV','AAPL','AMAT','APTV','ADM','ARNC',
'AJG','AIZ','T','ADSK','ADP','AZO','AVB','AVY','BLL','BAC','BK',
'BAX','BDX','BBY','BIIB','BLK','HRB','BA','BWA','BXP','BSX',
'BHF','BMY','AVGO','CHRW','CA','COG','CDNS','CPB','COF','CAH','CBOE',
'KMX','CCL','CAT','CNC','CNP','CERN','CF','SCHW',
'CHTR','CVX','CMG','CB','CHD','CI','XEC','CINF','CTAS','CSCO','C','CFG',
'CTXS','CLX','CME','CMS','KO','CTSH','CL','CMCSA','CMA','CAG','CXO','COP',
'ED','STZ','COO','GLW','COST','COTY','CCI','CSRA','CSX','CMI','CVS','DHI',
'DHR','DRI','DVA','DE','DAL','XRAY','DVN','DLR','DFS','DISCA','DISCK','DISH',
'DG','DLTR','D','DOV','DTE','DRE','DUK','DXC','EMN','ETN',
'EBAY','ECL','EIX','EW','EA','EMR','ETR','EVHC','EOG','EQT','EFX','EQIX','EQR',
'ESS','EL','ES','RE','EXC','EXPE','EXPD','ESRX','EXR','XOM','FFIV','FB','FAST',
'FRT','FDX','FIS','FITB','FE','FISV','FLIR','FLS','FLR','FMC','FL','F','FTV',
'FBHS','BEN','FCX','GPS','GRMN','IT','GD','GE','GIS','GM','GPC','GILD',
'GPN','GS','GT','GWW','HAL','HBI','HOG','HIG','HAS','HCA','HP','HSIC',
'HSY','HES','HPE','HLT','HOLX','HD','HON','HRL','HST','HPQ','HUM','HBAN','HII',
'IDXX','INFO','ITW','ILMN','IR','INTC','ICE','IBM','INCY','IP','IPG','IFF','INTU',
'ISRG','IVZ','IQV','IRM','JBHT','SJM','JNJ','JCI','JPM','JNPR','KSU','K','KEY',
'KMB','KIM','KMI','KLAC','KSS','KHC','KR','LB','LH','LRCX','LEG','LEN',
'LLY','LNC','LKQ','LMT','L','LOW','LYB','MTB','MAC','M','MRO','MPC','MAR','MMC','MLM',
'MAS','MA','MAT','MKC','MCD','MCK','MDT','MRK','MET','MTD','MGM','MCHP','MU',
'MSFT','MAA','MHK','TAP','MDLZ','MON','MNST','MCO','MS','MOS','MSI','NDAQ',
'NOV','NAVI','NTAP','NFLX','NWL','NFX','NEM','NWSA','NWS','NEE','NLSN','NKE','NI',
'JWN','NSC','NTRS','NOC','NCLH','NRG','NUE','NVDA','ORLY','OXY','OMC','OKE',
'ORCL','PCAR','PKG','PH','PDCO','PAYX','PYPL','PNR','PBCT','PEP','PKI','PRGO','PFE',
'PCG','PM','PSX','PNW','PXD','PNC','RL','PPG','PPL','PX','PFG','PG','PGR',
'PLD','PRU','PEG','PSA','PHM','PVH','QRVO','PWR','QCOM','DGX','RRC','RJF','O',
'REG','REGN','RF','RSG','RMD','RHI','ROK','COL','ROP','ROST','RCL','CRM','SBAC',
'SCG','SLB','SNI','STX','SEE','SRE','SHW','SIG','SPG','SWKS','SLG','SNA','SO','LUV',
'SPGI','SWK','SBUX','STT','SRCL','SYK','SYF','SNPS','SYY','TROW','TPR',
'TGT','TEL','FTI','TXN','TXT','TMO','TIF','TWX','TJX','TSCO','TDG','TRV',
'TRIP','FOXA','FOX','TSN','UDR','ULTA','USB','UAA','UA','UNP','UAL','UNH','UPS','URI',
'UHS','UNM','VFC','VLO','VAR','VTR','VRSN','VRSK','VZ','VRTX','V','VNO',
'VMC','WMT','WBA','DIS','WM','WAT','WEC','WFC','WDC','WU','WRK','WY','WHR','WMB',
'WLTW','WYNN','XEL','XRX','XLNX','XL','XYL','YUM','ZBH','ZION','ZTS']
# create empty dataframe
stock_final = pd.DataFrame()
stock_final = yf.download(Symbols, start=start, end=end, progress=True)
stock_final.drop(columns=['Open', 'Close', 'High', 'Low', 'Volume'], axis=1, inplace=True)
stock_final.stack().reset_index().rename(index=str, columns={"level_1": "Symbol"}).sort_values(['Symbol','Date'])
stock_final = stock_final.dropna(how='any',axis=1)
# iterate over each symbol
# for i in Symbols:
# # print the symbol which is being downloaded
# print( str(Symbols.index(i)) + str(' : ') + i, sep=',', end=',', flush=True)
# try:
# # download the stock price
# stock = []
# stock = yf.download(i,start=start, end=end, progress=False)
# # append the individual stock prices
# if len(stock) == 0:
# None
# else:
# stock['Name']=i
# stock.drop(columns=['Open', 'Close', 'High', 'Low', 'Volume'], axis=1, inplace=True)
# stock_final = stock_final.append(stock,sort=False)
# except Exception:
# None
stock_final.dropna(how="any", thresh=None, subset=None,inplace=False)
stock_final.to_csv(r'scrapper.csv', index = False, header = True)
print("\n\n\nSuccessfully scraped.\nCheck root directory for the file \"scrapper.csv\" and add it to the Stock_Portfolio_Maker folder.")
print("Open scrapper.csv and delete the first row that says \"Adj Close\".")
# -------- WEB SCRAPER END ---------