diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..894a44c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,104 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
diff --git a/.ipynb_checkpoints/NN-checkpoint.ipynb b/.ipynb_checkpoints/NN-checkpoint.ipynb
deleted file mode 100644
index b286466..0000000
--- a/.ipynb_checkpoints/NN-checkpoint.ipynb
+++ /dev/null
@@ -1,1193 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 58,
- "metadata": {},
- "outputs": [],
- "source": [
- "import string \n",
- "import unicodedata\n",
- "import sys\n",
- "import collections\n",
- "import random\n",
- "import math\n",
- "import os\n",
- "from collections import Counter\n",
- "from ast import literal_eval\n",
- "import regex as re\n",
- "import pickle\n",
- "from functools import reduce\n",
- "from datetime import datetime \n",
- "\n",
- "import pandas as pd\n",
- "import numpy as np\n",
- "import matplotlib.pyplot as plt\n",
- "\n",
- "import xgboost as xgb\n",
- "import lightgbm as lgbm\n",
- "from IPython.display import display\n",
- "\n",
- "from sklearn.model_selection import train_test_split\n",
- "from sklearn.feature_extraction.text import TfidfVectorizer\n",
- "from sklearn.linear_model import LogisticRegression\n",
- "from sklearn.naive_bayes import BernoulliNB\n",
- "from sklearn.ensemble import RandomForestClassifier\n",
- "\n",
- "from nltk.corpus import stopwords\n",
- "from nltk import word_tokenize\n",
- "from nltk.stem import PorterStemmer\n",
- "from nltk.stem.wordnet import WordNetLemmatizer\n",
- "\n",
- "import tensorflow as tf\n",
- "from keras.models import Sequential, Model, load_model\n",
- "from keras.layers import ( Dense, Conv1D, Activation, MaxPool1D, \n",
- " Embedding, Flatten, Reshape, concatenate, \n",
- " Input, Dropout, LSTM, AveragePooling1D, Masking )\n",
- "from keras import optimizers\n",
- "from keras import backend as K\n",
- "from keras.callbacks import ModelCheckpoint, EarlyStopping\n",
- "import h5py as h5py\n",
- "\n",
- "%matplotlib inline"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 59,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Difference | \n",
- " date | \n",
- " nextClose | \n",
- " nextDay | \n",
- " prevClose | \n",
- " prevDay | \n",
- " text | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " -1.30 | \n",
- " 2011-06-08 | \n",
- " 376.55 | \n",
- " 2011-06-09 | \n",
- " 377.85 | \n",
- " 2011-06-07 | \n",
- " Airtel commences 3G services in J&K | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 5.35 | \n",
- " 2011-06-13 | \n",
- " 379.30 | \n",
- " 2011-06-14 | \n",
- " 373.95 | \n",
- " 2011-06-10 | \n",
- " Airtel dances to African tune, sees more 3G li... | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 3.80 | \n",
- " 2011-06-16 | \n",
- " 380.55 | \n",
- " 2011-06-17 | \n",
- " 376.75 | \n",
- " 2011-06-15 | \n",
- " TCIL may approach company law board against Bh... | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 20.20 | \n",
- " 2011-06-17 | \n",
- " 389.85 | \n",
- " 2011-06-20 | \n",
- " 369.65 | \n",
- " 2011-06-16 | \n",
- " Malkani bullish on Bharti Airtel | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 20.20 | \n",
- " 2011-06-17 | \n",
- " 389.85 | \n",
- " 2011-06-20 | \n",
- " 369.65 | \n",
- " 2011-06-16 | \n",
- " Hold Bharti Airtel: Angel Broking | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Difference date nextClose nextDay prevClose prevDay \\\n",
- "0 -1.30 2011-06-08 376.55 2011-06-09 377.85 2011-06-07 \n",
- "1 5.35 2011-06-13 379.30 2011-06-14 373.95 2011-06-10 \n",
- "2 3.80 2011-06-16 380.55 2011-06-17 376.75 2011-06-15 \n",
- "3 20.20 2011-06-17 389.85 2011-06-20 369.65 2011-06-16 \n",
- "4 20.20 2011-06-17 389.85 2011-06-20 369.65 2011-06-16 \n",
- "\n",
- " text \n",
- "0 Airtel commences 3G services in J&K \n",
- "1 Airtel dances to African tune, sees more 3G li... \n",
- "2 TCIL may approach company law board against Bh... \n",
- "3 Malkani bullish on Bharti Airtel \n",
- "4 Hold Bharti Airtel: Angel Broking "
- ]
- },
- "execution_count": 59,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df = pd.DataFrame()\n",
- "\n",
- "for filename in os.listdir(os.path.join(os.getcwd(), 'datasets')):\n",
- " if filename[-3:] == 'csv':\n",
- " df = df.append(pd.read_csv(os.path.join(os.getcwd(), 'datasets', filename), sep='|'), ignore_index=True) \n",
- " \n",
- "# display(df.head())\n",
- "df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 60,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Difference | \n",
- " nextClose | \n",
- " prevClose | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " count | \n",
- " 30045.000000 | \n",
- " 30045.000000 | \n",
- " 30045.000000 | \n",
- "
\n",
- " \n",
- " mean | \n",
- " -2.832062 | \n",
- " 1089.146976 | \n",
- " 1091.979038 | \n",
- "
\n",
- " \n",
- " std | \n",
- " 91.934622 | \n",
- " 775.223777 | \n",
- " 778.694248 | \n",
- "
\n",
- " \n",
- " min | \n",
- " -2648.650000 | \n",
- " 162.050000 | \n",
- " 155.900000 | \n",
- "
\n",
- " \n",
- " 25% | \n",
- " -12.100000 | \n",
- " 405.950000 | \n",
- " 405.850000 | \n",
- "
\n",
- " \n",
- " 50% | \n",
- " 0.950000 | \n",
- " 938.000000 | \n",
- " 934.250000 | \n",
- "
\n",
- " \n",
- " 75% | \n",
- " 14.900000 | \n",
- " 1436.400000 | \n",
- " 1445.050000 | \n",
- "
\n",
- " \n",
- " max | \n",
- " 303.050000 | \n",
- " 4365.900000 | \n",
- " 4359.850000 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Difference nextClose prevClose\n",
- "count 30045.000000 30045.000000 30045.000000\n",
- "mean -2.832062 1089.146976 1091.979038\n",
- "std 91.934622 775.223777 778.694248\n",
- "min -2648.650000 162.050000 155.900000\n",
- "25% -12.100000 405.950000 405.850000\n",
- "50% 0.950000 938.000000 934.250000\n",
- "75% 14.900000 1436.400000 1445.050000\n",
- "max 303.050000 4365.900000 4359.850000"
- ]
- },
- "execution_count": 60,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.describe()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 61,
- "metadata": {},
- "outputs": [],
- "source": [
- "stops = stopwords.words('english')\n",
- "porter = PorterStemmer()\n",
- "lemma = WordNetLemmatizer()\n",
- "\n",
- "tbl = dict.fromkeys(i for i in range(sys.maxunicode)\n",
- " if unicodedata.category(chr(i)).startswith('P') or i == 36 or i == ord('`'))\n",
- "\n",
- "\n",
- "def remove_punctuation(text):\n",
- " '''\n",
- " From https://stackoverflow.com/questions/11066400/remove-punctuation-from-unicode-formatted-strings\n",
- "\n",
- " '''\n",
- " return text.translate(tbl)\n",
- "\n",
- "\n",
- "def remove_stopwords(text, ret_format='str'):\n",
- " tokens = filter(lambda x: x not in stops, map(porter.stem, word_tokenize(text)))\n",
- " if ret_format == 'list':\n",
- " return list(tokens)\n",
- " elif ret_format == 'str':\n",
- " return ' '.join(tokens)\n",
- " else:\n",
- " raise Exception('Invalid format')\n",
- "\n",
- "\n",
- "def restore_arr(a):\n",
- " '''\n",
- " Converts strings to python list\n",
- " \n",
- " params:\n",
- " a: String -> Input string to be converted to array\n",
- " return:\n",
- " list\n",
- " \n",
- " Usage with pandas:\n",
- " train_mod = pd.read_csv('modified_train.csv', converters={'description_norm': restore_arr})\n",
- " '''\n",
- " return [x.replace(\"'\", \"\") for x in a[:-1][1:].split(', ')]\n",
- " \n",
- " \n",
- "\n",
- "def restore_int_arr(a):\n",
- " return [int(x.replace(\"'\", \"\")) for x in a[:-1][1:].split(', ')]\n",
- " \n",
- "\n",
- "def restore_float_arr(a):\n",
- " ret = [float(x.replace(\"'\", \"\")) for x in a[:-1][1:].split(', ')]\n",
- " if len(ret) == 1:\n",
- " return ret[0]\n",
- " else:\n",
- " return ret\n",
- " \n",
- "def lemmatize(a):\n",
- " return [lemma.lemmatize(x) for x in a.split()]\n",
- "\n",
- "def remove_numbers(a):\n",
- " ans = []\n",
- " for s in a.split():\n",
- " try:\n",
- " g = int(s)\n",
- " except ValueError:\n",
- " ans.append(s)\n",
- " \n",
- " return ' '.join(ans)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 62,
- "metadata": {},
- "outputs": [],
- "source": [
- "df['text_norm'] = df['text']\\\n",
- " .apply(str.lower)\\\n",
- " .apply(remove_punctuation)\\\n",
- " .apply(remove_numbers)\\\n",
- " .apply(remove_stopwords)\\\n",
- " .apply(lambda x: x.split())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 63,
- "metadata": {},
- "outputs": [],
- "source": [
- "df['date'] = df['date'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d'))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 64,
- "metadata": {},
- "outputs": [],
- "source": [
- "df['target'] = (df['Difference'] > 0).astype('int')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": 72,
- "metadata": {
- "scrolled": true
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Difference | \n",
- " date | \n",
- " nextClose | \n",
- " nextDay | \n",
- " prevClose | \n",
- " prevDay | \n",
- " text | \n",
- " text_norm | \n",
- " target | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 2122 | \n",
- " -1.2 | \n",
- " 2017-01-02 | \n",
- " 304.45 | \n",
- " 2017-01-03 | \n",
- " 305.65 | \n",
- " 2016-12-30 | \n",
- " GSM telcos gain over 10mn subscribers in Nov; ... | \n",
- " [gsm, telco, gain, 10mn, subscrib, nov, idea, ... | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 2123 | \n",
- " 2.9 | \n",
- " 2017-01-03 | \n",
- " 313.90 | \n",
- " 2017-01-04 | \n",
- " 311.00 | \n",
- " 2017-01-02 | \n",
- " Tulsian's take on Bharat Financial, oil & gas ... | \n",
- " [tulsian, take, bharat, financi, oil, ga, co, ... | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 2124 | \n",
- " 2.9 | \n",
- " 2017-01-03 | \n",
- " 313.90 | \n",
- " 2017-01-04 | \n",
- " 311.00 | \n",
- " 2017-01-02 | \n",
- " Tariff War:Airtel offers 3GB free 4G mthly dat... | \n",
- " [tariff, warairtel, offer, 3gb, free, 4g, mthl... | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 2125 | \n",
- " 2.9 | \n",
- " 2017-01-03 | \n",
- " 313.90 | \n",
- " 2017-01-04 | \n",
- " 311.00 | \n",
- " 2017-01-02 | \n",
- " Airtel in discussions to buy Telenor's Indian ... | \n",
- " [airtel, discuss, buy, telenor, indian, busi] | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 2126 | \n",
- " 2.9 | \n",
- " 2017-01-03 | \n",
- " 313.90 | \n",
- " 2017-01-04 | \n",
- " 311.00 | \n",
- " 2017-01-02 | \n",
- " Maximum call drop recorded on Aircel network i... | \n",
- " [maximum, call, drop, record, aircel, network,... | \n",
- " 1 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Difference date nextClose nextDay prevClose prevDay \\\n",
- "2122 -1.2 2017-01-02 304.45 2017-01-03 305.65 2016-12-30 \n",
- "2123 2.9 2017-01-03 313.90 2017-01-04 311.00 2017-01-02 \n",
- "2124 2.9 2017-01-03 313.90 2017-01-04 311.00 2017-01-02 \n",
- "2125 2.9 2017-01-03 313.90 2017-01-04 311.00 2017-01-02 \n",
- "2126 2.9 2017-01-03 313.90 2017-01-04 311.00 2017-01-02 \n",
- "\n",
- " text \\\n",
- "2122 GSM telcos gain over 10mn subscribers in Nov; ... \n",
- "2123 Tulsian's take on Bharat Financial, oil & gas ... \n",
- "2124 Tariff War:Airtel offers 3GB free 4G mthly dat... \n",
- "2125 Airtel in discussions to buy Telenor's Indian ... \n",
- "2126 Maximum call drop recorded on Aircel network i... \n",
- "\n",
- " text_norm target \n",
- "2122 [gsm, telco, gain, 10mn, subscrib, nov, idea, ... 0 \n",
- "2123 [tulsian, take, bharat, financi, oil, ga, co, ... 1 \n",
- "2124 [tariff, warairtel, offer, 3gb, free, 4g, mthl... 1 \n",
- "2125 [airtel, discuss, buy, telenor, indian, busi] 1 \n",
- "2126 [maximum, call, drop, record, aircel, network,... 1 "
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Difference | \n",
- " date | \n",
- " nextClose | \n",
- " nextDay | \n",
- " prevClose | \n",
- " prevDay | \n",
- " text | \n",
- " text_norm | \n",
- " target | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " -1.30 | \n",
- " 2011-06-08 | \n",
- " 376.55 | \n",
- " 2011-06-09 | \n",
- " 377.85 | \n",
- " 2011-06-07 | \n",
- " Airtel commences 3G services in J&K | \n",
- " [airtel, commenc, 3g, servic, jk] | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " 5.35 | \n",
- " 2011-06-13 | \n",
- " 379.30 | \n",
- " 2011-06-14 | \n",
- " 373.95 | \n",
- " 2011-06-10 | \n",
- " Airtel dances to African tune, sees more 3G li... | \n",
- " [airtel, danc, african, tune, see, 3g, licens] | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " 3.80 | \n",
- " 2011-06-16 | \n",
- " 380.55 | \n",
- " 2011-06-17 | \n",
- " 376.75 | \n",
- " 2011-06-15 | \n",
- " TCIL may approach company law board against Bh... | \n",
- " [tcil, may, approach, compani, law, board, bha... | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " 20.20 | \n",
- " 2011-06-17 | \n",
- " 389.85 | \n",
- " 2011-06-20 | \n",
- " 369.65 | \n",
- " 2011-06-16 | \n",
- " Malkani bullish on Bharti Airtel | \n",
- " [malkani, bullish, bharti, airtel] | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " 20.20 | \n",
- " 2011-06-17 | \n",
- " 389.85 | \n",
- " 2011-06-20 | \n",
- " 369.65 | \n",
- " 2011-06-16 | \n",
- " Hold Bharti Airtel: Angel Broking | \n",
- " [hold, bharti, airtel, angel, broke] | \n",
- " 1 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Difference date nextClose nextDay prevClose prevDay \\\n",
- "0 -1.30 2011-06-08 376.55 2011-06-09 377.85 2011-06-07 \n",
- "1 5.35 2011-06-13 379.30 2011-06-14 373.95 2011-06-10 \n",
- "2 3.80 2011-06-16 380.55 2011-06-17 376.75 2011-06-15 \n",
- "3 20.20 2011-06-17 389.85 2011-06-20 369.65 2011-06-16 \n",
- "4 20.20 2011-06-17 389.85 2011-06-20 369.65 2011-06-16 \n",
- "\n",
- " text \\\n",
- "0 Airtel commences 3G services in J&K \n",
- "1 Airtel dances to African tune, sees more 3G li... \n",
- "2 TCIL may approach company law board against Bh... \n",
- "3 Malkani bullish on Bharti Airtel \n",
- "4 Hold Bharti Airtel: Angel Broking \n",
- "\n",
- " text_norm target \n",
- "0 [airtel, commenc, 3g, servic, jk] 0 \n",
- "1 [airtel, danc, african, tune, see, 3g, licens] 1 \n",
- "2 [tcil, may, approach, compani, law, board, bha... 1 \n",
- "3 [malkani, bullish, bharti, airtel] 1 \n",
- "4 [hold, bharti, airtel, angel, broke] 1 "
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Difference | \n",
- " nextClose | \n",
- " prevClose | \n",
- " target | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " count | \n",
- " 2700.000000 | \n",
- " 2700.000000 | \n",
- " 2700.000000 | \n",
- " 2700.000000 | \n",
- "
\n",
- " \n",
- " mean | \n",
- " 2.496870 | \n",
- " 1181.912278 | \n",
- " 1179.415407 | \n",
- " 0.578519 | \n",
- "
\n",
- " \n",
- " std | \n",
- " 77.984853 | \n",
- " 877.235840 | \n",
- " 877.623915 | \n",
- " 0.493888 | \n",
- "
\n",
- " \n",
- " min | \n",
- " -1519.450000 | \n",
- " 251.100000 | \n",
- " 251.100000 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 25% | \n",
- " -7.312500 | \n",
- " 451.250000 | \n",
- " 449.950000 | \n",
- " 0.000000 | \n",
- "
\n",
- " \n",
- " 50% | \n",
- " 3.250000 | \n",
- " 920.625000 | \n",
- " 919.700000 | \n",
- " 1.000000 | \n",
- "
\n",
- " \n",
- " 75% | \n",
- " 19.850000 | \n",
- " 1693.900000 | \n",
- " 1698.700000 | \n",
- " 1.000000 | \n",
- "
\n",
- " \n",
- " max | \n",
- " 209.050000 | \n",
- " 3412.950000 | \n",
- " 3412.950000 | \n",
- " 1.000000 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Difference nextClose prevClose target\n",
- "count 2700.000000 2700.000000 2700.000000 2700.000000\n",
- "mean 2.496870 1181.912278 1179.415407 0.578519\n",
- "std 77.984853 877.235840 877.623915 0.493888\n",
- "min -1519.450000 251.100000 251.100000 0.000000\n",
- "25% -7.312500 451.250000 449.950000 0.000000\n",
- "50% 3.250000 920.625000 919.700000 1.000000\n",
- "75% 19.850000 1693.900000 1698.700000 1.000000\n",
- "max 209.050000 3412.950000 3412.950000 1.000000"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/html": [
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " Difference | \n",
- " date | \n",
- " nextClose | \n",
- " nextDay | \n",
- " prevClose | \n",
- " prevDay | \n",
- " text | \n",
- " text_norm | \n",
- " target | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 18943 | \n",
- " -0.95 | \n",
- " 2018-03-14 | \n",
- " 311.85 | \n",
- " 2018-03-15 | \n",
- " 312.80 | \n",
- " 2018-03-13 | \n",
- " Buy ICICI Bank, State Bank of India, Adani Ent... | \n",
- " [buy, icici, bank, state, bank, india, adani, ... | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 18944 | \n",
- " -0.95 | \n",
- " 2018-03-14 | \n",
- " 311.85 | \n",
- " 2018-03-15 | \n",
- " 312.80 | \n",
- " 2018-03-13 | \n",
- " News Highlights: Facebook bans far-right group... | \n",
- " [news, highlight, facebook, ban, farright, gro... | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 18945 | \n",
- " -0.95 | \n",
- " 2018-03-14 | \n",
- " 311.85 | \n",
- " 2018-03-15 | \n",
- " 312.80 | \n",
- " 2018-03-13 | \n",
- " Malaysia based IHH eyes YES Bank's Fortis stake | \n",
- " [malaysia, base, ihh, eye, ye, bank, forti, st... | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 18946 | \n",
- " -0.95 | \n",
- " 2018-03-14 | \n",
- " 311.85 | \n",
- " 2018-03-15 | \n",
- " 312.80 | \n",
- " 2018-03-13 | \n",
- " Nifty likely to remain rangebound between 10,3... | \n",
- " [nifti, like, remain, rangebound, ye, bank, gi... | \n",
- " 0 | \n",
- "
\n",
- " \n",
- " 18947 | \n",
- " -5.95 | \n",
- " 2018-03-15 | \n",
- " 312.90 | \n",
- " 2018-03-16 | \n",
- " 318.85 | \n",
- " 2018-03-14 | \n",
- " Buy Yes Bank, target Rs 408; bank well positio... | \n",
- " [buy, ye, bank, target, rs, bank, well, posit,... | \n",
- " 0 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Difference date nextClose nextDay prevClose prevDay \\\n",
- "18943 -0.95 2018-03-14 311.85 2018-03-15 312.80 2018-03-13 \n",
- "18944 -0.95 2018-03-14 311.85 2018-03-15 312.80 2018-03-13 \n",
- "18945 -0.95 2018-03-14 311.85 2018-03-15 312.80 2018-03-13 \n",
- "18946 -0.95 2018-03-14 311.85 2018-03-15 312.80 2018-03-13 \n",
- "18947 -5.95 2018-03-15 312.90 2018-03-16 318.85 2018-03-14 \n",
- "\n",
- " text \\\n",
- "18943 Buy ICICI Bank, State Bank of India, Adani Ent... \n",
- "18944 News Highlights: Facebook bans far-right group... \n",
- "18945 Malaysia based IHH eyes YES Bank's Fortis stake \n",
- "18946 Nifty likely to remain rangebound between 10,3... \n",
- "18947 Buy Yes Bank, target Rs 408; bank well positio... \n",
- "\n",
- " text_norm target \n",
- "18943 [buy, icici, bank, state, bank, india, adani, ... 0 \n",
- "18944 [news, highlight, facebook, ban, farright, gro... 0 \n",
- "18945 [malaysia, base, ihh, eye, ye, bank, forti, st... 0 \n",
- "18946 [nifti, like, remain, rangebound, ye, bank, gi... 0 \n",
- "18947 [buy, ye, bank, target, rs, bank, well, posit,... 0 "
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "last_date = datetime.strptime('2016 Dec 31', '%Y %b %d')\n",
- "\n",
- "# test = df[last_date < df['date']]\n",
- "# train = df[last_date >= df['date']]\n",
- "train = df\n",
- "\n",
- "display(test.head())\n",
- "display(train.head())\n",
- "display(test.describe())\n",
- "display(test.tail())"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 82,
- "metadata": {},
- "outputs": [],
- "source": [
- "params = {\n",
- " 'vocab_size': 2000,\n",
- " 'PAD': 0,\n",
- " 'UNK': 1,\n",
- " 'maxlen': df['text_norm'].map(len).max(),\n",
- " 'kernels': (3, 3, 3,),\n",
- " 'num_filters': (2, 2, 2),\n",
- " 'hidden_dims': 64,\n",
- " 'batch_size': 32,\n",
- " 'embedding_size': 64,\n",
- " 'pool_size': 2,\n",
- " 'threshold': 0.5,\n",
- " 'LSTM_units': 64\n",
- "}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 83,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "'10 most common words'"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "[('bank', 9174),\n",
- " ('buy', 6843),\n",
- " ('rs', 6040),\n",
- " ('say', 2813),\n",
- " ('sukhani', 2716),\n",
- " ('tata', 2467),\n",
- " ('infosi', 2400),\n",
- " ('sell', 2399),\n",
- " ('sbi', 2207),\n",
- " ('icici', 2187)]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "'Number of unique tokens'"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "data": {
- "text/plain": [
- "8521"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "vocabulary = dict()\n",
- "counter = Counter()\n",
- "\n",
- "for sent in train['text_norm']:\n",
- " counter.update(sent)\n",
- " \n",
- "display(\"10 most common words\")\n",
- "display(counter.most_common(10)) \n",
- "display(\"Number of unique tokens\")\n",
- "display(len(counter.keys()))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 84,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Most common words + (PAD and UNK) [['PAD', 299096], ['UNK', 14734], ('bank', 9174), ('buy', 6843), ('rs', 6040), ('say', 2813), ('sukhani', 2716), ('tata', 2467)]\n",
- "Dataset \n",
- " 0 [20, 1, 729, 113, 730, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
- "1 [20, 1, 1, 1, 41, 729, 1, 0, 0, 0, 0, 0, 0, 0,...\n",
- "2 [1, 12, 1404, 181, 1405, 160, 24, 20, 0, 0, 0,...\n",
- "3 [1406, 128, 24, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
- "4 [33, 24, 20, 131, 162, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
- "Name: description_vectors, dtype: object\n"
- ]
- }
- ],
- "source": [
- "unk_count = 0\n",
- "pad_count = 0\n",
- "\n",
- "def convert_to_dict(arr, maxlen, dictionary, pad=True):\n",
- " global unk_count, pad_count\n",
- " \n",
- " if pad:\n",
- "# ret = np.full(shape=(maxlen, ), fill_value=params['PAD'])\n",
- " ret = [params['PAD'] for x in range(maxlen)]\n",
- " else:\n",
- " ret = np.full(shape=(len(arr) + 1, ), fill_value=params['PAD'])\n",
- " \n",
- " for i, word in enumerate(arr):\n",
- " if word in dictionary:\n",
- " ret[i] = dictionary[word]\n",
- " else:\n",
- " ret[i] = params['UNK']\n",
- " unk_count += 1\n",
- " \n",
- " pad_count += maxlen - len(arr)\n",
- " return ret\n",
- " \n",
- "\n",
- "def create_dataset(cnt, train):\n",
- " global pad_count, unk_count\n",
- " \n",
- " count = [['PAD', -2], ['UNK', -1]]\n",
- " count.extend(cnt.most_common(params['vocab_size'] - 2))\n",
- " dictionary = dict()\n",
- " for word, _ in count:\n",
- " dictionary[word] = len(dictionary)\n",
- " \n",
- " reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys()))\n",
- " series = train['text_norm'].apply(convert_to_dict, args=(params['maxlen'], dictionary))\n",
- " count[0][-1] = pad_count\n",
- " count[1][-1] = unk_count\n",
- " return series, dictionary, reverse_dictionary, count\n",
- "\n",
- "train['description_vectors'], word_dict, word_rev_dict, count = create_dataset(counter, train)\n",
- "print('Most common words + (PAD and UNK) ', count[:8])\n",
- "print('Dataset','\\n', train['description_vectors'][:5])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 85,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(27040, 17) (3005, 17) (27040,)\n"
- ]
- }
- ],
- "source": [
- "desc = np.array([np.array(x) for x in train['description_vectors']])\n",
- "\n",
- "x_train, x_test, y_train, y_test = train_test_split(\n",
- " desc, \n",
- " train['Difference'], \n",
- " test_size=0.1, \n",
- " random_state=4)\n",
- "\n",
- "print(x_train.shape, x_test.shape, y_train.shape)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 86,
- "metadata": {},
- "outputs": [],
- "source": [
- "convolutions = list()\n",
- "\n",
- "input_shape = (params['maxlen'], )\n",
- "conv_input = Input(shape=input_shape)\n",
- "\n",
- "embed = Embedding(params['vocab_size'],\n",
- " params['embedding_size'],\n",
- " input_length=params['maxlen'],\n",
- " name='Embedding'\n",
- " )(conv_input)\n",
- "\n",
- "\n",
- "embed = Dropout(0.5, name='Dropout_1')(embed)\n",
- "\n",
- "for i, size in enumerate(params['kernels']):\n",
- " conv = Conv1D(filters=params['num_filters'][i], kernel_size=size, padding='same', activation='relu')(embed)\n",
- " conv = MaxPool1D(pool_size=params['pool_size'])(conv)\n",
- " conv = Flatten()(conv)\n",
- " convolutions.append(conv)\n",
- " \n",
- "out = concatenate(convolutions)\n",
- "out = Dropout(0.4, name='Dropout_2')(out)\n",
- "out = Dense(params['hidden_dims'], activation='relu', name='Dense_hidden')(out)\n",
- "out = Dense(1, activation='softmax', name='Dense_final')(out)\n",
- "\n",
- "model = Model(inputs=conv_input, outputs=out)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Train on 27040 samples, validate on 3005 samples\n",
- "Epoch 1/3\n",
- "27040/27040 [==============================] - 11s - loss: 59.6068 - acc: 9.2456e-04 - val_loss: 74.4581 - val_acc: 3.3278e-04\n",
- "Epoch 2/3\n",
- "27040/27040 [==============================] - 10s - loss: 59.6068 - acc: 9.2456e-04 - val_loss: 74.4581 - val_acc: 3.3278e-04\n",
- "Epoch 3/3\n",
- "10656/27040 [==========>...................] - ETA: 5s - loss: 40.8227 - acc: 0.0012"
- ]
- }
- ],
- "source": [
- "model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
- "model.fit(x_train, y_train, \n",
- " batch_size=params['batch_size'], \n",
- " validation_data=(x_test, y_test),\n",
- " verbose=1, epochs=3, callbacks=[ModelCheckpoint('model_cnn_v1.keras', save_best_only=True)])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [
- {
- "ename": "ModuleNotFoundError",
- "evalue": "No module named 'gensim'",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mfrom\u001b[0m \u001b[0mgensim\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodels\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mWord2Vec\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
- "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'gensim'"
- ]
- }
- ],
- "source": [
- "from gensim.models import Word2Vec"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 111,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "\n"
- ]
- }
- ],
- "source": [
- "from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer\n",
- "count_vect = TfidfVectorizer()\n",
- "X = count_vect.fit_transform(train['text'])\n",
- "X.toarray()\n",
- "print (type(X))\n",
- "\n",
- "x_train, x_test, y_train, y_test = train_test_split(\n",
- " X, \n",
- " train['target'], \n",
- " test_size=0.1, \n",
- " random_state=4)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 112,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "[0]\ttrain-error:0.462833\tvalid-error:0.478536\n",
- "Multiple eval metrics have been passed: 'valid-error' will be used for early stopping.\n",
- "\n",
- "Will train until valid-error hasn't improved in 50 rounds.\n",
- "[20]\ttrain-error:0.421709\tvalid-error:0.47188\n",
- "[40]\ttrain-error:0.40233\tvalid-error:0.462562\n",
- "[60]\ttrain-error:0.38824\tvalid-error:0.463561\n",
- "[80]\ttrain-error:0.376331\tvalid-error:0.464226\n",
- "[100]\ttrain-error:0.370932\tvalid-error:0.464892\n",
- "Stopping. Best iteration:\n",
- "[69]\ttrain-error:0.383617\tvalid-error:0.461231\n",
- "\n"
- ]
- }
- ],
- "source": [
- "d_train = xgb.DMatrix(x_train, label=y_train)\n",
- "d_valid = xgb.DMatrix(x_test, label=y_test)\n",
- "\n",
- "xgb_params = {\n",
- " 'eta': 0.12,\n",
- " 'objective': 'binary:logistic',\n",
- " 'eval_metric': 'error',\n",
- " 'max-depth': 6,\n",
- " 'gamma': 5,\n",
- " 'subsample': 0.76,\n",
- " 'colsample_bytree': 0.8\n",
- "}\n",
- "\n",
- "watchlist = [(d_train, 'train'), (d_valid, 'valid')]\n",
- "\n",
- "xgb_model = xgb.train(xgb_params, d_train, 500, watchlist, verbose_eval=20, early_stopping_rounds=50)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 113,
- "metadata": {},
- "outputs": [],
- "source": [
- "p = xgb_model.predict(d_train)\n",
- "mask = p > 0.5\n",
- "\n",
- "p[mask] = 1\n",
- "p[np.logical_not(mask)] = 0"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 115,
- "metadata": {},
- "outputs": [
- {
- "ename": "ValueError",
- "evalue": "Lengths must match to compare",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mp\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mp\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0my_train\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0mp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
- "\u001b[0;32m/usr/local/Cellar/python3/3.6.1/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/pandas/core/ops.py\u001b[0m in \u001b[0;36mwrapper\u001b[0;34m(self, other, axis)\u001b[0m\n\u001b[1;32m 820\u001b[0m if (not lib.isscalar(lib.item_from_zerodim(other)) and\n\u001b[1;32m 821\u001b[0m len(self) != len(other)):\n\u001b[0;32m--> 822\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'Lengths must match to compare'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 823\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 824\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mother\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mABCPeriodIndex\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mValueError\u001b[0m: Lengths must match to compare"
- ]
- }
- ],
- "source": [
- "p[p == y_train].shape[0]/p.shape[0]"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/.ipynb_checkpoints/merge-checkpoint.ipynb b/.ipynb_checkpoints/merge-checkpoint.ipynb
deleted file mode 100644
index f92e362..0000000
--- a/.ipynb_checkpoints/merge-checkpoint.ipynb
+++ /dev/null
@@ -1,181 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "from collections import defaultdict\n",
- "from datetime import datetime\n",
- "from operator import itemgetter\n",
- "import csv\n",
- "import os\n",
- "\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 48,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "{'BPCL', 'INFY', 'LT', 'SBIN', 'M&M'}\n"
- ]
- }
- ],
- "source": [
- "dpath = os.path.join(os.getcwd(), 'datasets', 'EXTRA')\n",
- "l = os.listdir(dpath)\n",
- "names = set([x[24:-7] for x in l])\n",
- "print(names)\n",
- "d = {}\n",
- "\n",
- "for x in names:\n",
- " d[x] = pd.DataFrame()\n",
- " \n",
- "for x in l:\n",
- " df = pd.read_csv(os.path.join(dpath, x))\n",
- " d[x[24:-7]] = d[x[24:-7]].append(df, ignore_index=True)\n",
- "\n",
- "for x in names:\n",
- " d[x].to_csv(os.path.join(os.getcwd(), 'datasets/NSE', '{}.csv'.format(x)), index=False)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 49,
- "metadata": {},
- "outputs": [],
- "source": [
- "for f in names:\n",
- " nseName = os.path.join(os.getcwd(), 'datasets/NSE', '{}.csv'.format(f))\n",
- " mcName = os.path.join(os.getcwd(), 'datasets/MC', 'MC{}.csv'.format(f))\n",
- " nse = pd.read_csv(nseName)\n",
- " mc = pd.read_csv(mcName, sep='|')\n",
- "\n",
- " # print(nse)\n",
- " # print(mc.head())\n",
- " nse['Date'] = nse['Date'].apply(lambda x: datetime.strptime(x, '%d-%b-%Y'))\n",
- " mc['date'] = mc['date'].apply(lambda x: x.strip()).apply(lambda x: datetime.strptime(x, '%d %b %Y'))\n",
- " mc = mc.sort_values(by='date')\n",
- " # print(nse.head())\n",
- " # print(mc.head())\n",
- " \n",
- " mydick = defaultdict(list)\n",
- " Date = [[row['Date'], row['Close Price']] for i, row in nse.iterrows()]\n",
- "\n",
- " for date, text in zip(mc['date'], mc['text']):\n",
- " mydick['date'].append(date)\n",
- " mydick['text'].append(text)\n",
- " myDate = Date[:]\n",
- " myDate.append([date, 0])\n",
- " myDate.sort(key=itemgetter(0))\n",
- "\n",
- " ind = myDate.index([date, 0])\n",
- " try:\n",
- " prevDay = myDate[ind - 1]\n",
- " except IndexError:\n",
- " prevDay = myDate[ind + 1]\n",
- " try:\n",
- " nextDay = myDate[ind + 1]\n",
- " except IndexError:\n",
- " nextDay = myDate[ind - 1]\n",
- " try:\n",
- " if prevDay[0] == date:\n",
- " prevDay = myDate[ind - 2]\n",
- " if nextDay[0] == date:\n",
- " nextDay = myDate[ind + 2]\n",
- " except IndexError:\n",
- " pass\n",
- " mydick['prevDay'].append(prevDay[0])\n",
- " mydick['prevClose'].append(prevDay[1])\n",
- " mydick['nextDay'].append(nextDay[0])\n",
- " mydick['nextClose'].append(nextDay[1])\n",
- " mydick['Difference'].append(nextDay[1] - prevDay[1])\n",
- " \n",
- " \n",
- " df = pd.DataFrame(mydick)\n",
- " df.to_csv(os.path.join(os.getcwd(),'datasets/Merge/MERGE' + f + '.csv'), index=False, sep='|')\n",
- "\n",
- " "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "metadata": {},
- "outputs": [],
- "source": [
- "mydick = defaultdict(list)\n",
- "Date = [[row['Date'], row['Close Price']] for i, row in nse.iterrows()]\n",
- "\n",
- "for date, text in zip(mc['date'], mc['text']):\n",
- " mydick['date'].append(date)\n",
- " mydick['text'].append(text)\n",
- " myDate = Date[:]\n",
- " myDate.append([date, 0])\n",
- " myDate.sort(key=itemgetter(0))\n",
- "\n",
- " ind = myDate.index([date, 0])\n",
- " try:\n",
- " prevDay = myDate[ind - 1]\n",
- " except IndexError:\n",
- " prevDay = myDate[ind + 1]\n",
- " try:\n",
- " nextDay = myDate[ind + 1]\n",
- " except IndexError:\n",
- " nextDay = myDate[ind - 1]\n",
- " try:\n",
- " if prevDay[0] == date:\n",
- " prevDay = myDate[ind - 2]\n",
- " if nextDay[0] == date:\n",
- " nextDay = myDate[ind + 2]\n",
- " except IndexError:\n",
- " pass\n",
- " mydick['prevDay'].append(prevDay[0])\n",
- " mydick['prevClose'].append(prevDay[1])\n",
- " mydick['nextDay'].append(nextDay[0])\n",
- " mydick['nextClose'].append(nextDay[1])\n",
- " mydick['Difference'].append(nextDay[1] - prevDay[1])\n",
- "\n",
- "# print(mydick['Difference'])\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "metadata": {},
- "outputs": [],
- "source": [
- "df = pd.DataFrame(mydick)\n",
- "df.to_csv('Merge/MERGEAIRTEL.csv', index=False, sep='|')\n"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.3"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}