diff --git a/app.py b/app.py
index c48f6d9..9d6559a 100644
--- a/app.py
+++ b/app.py
@@ -1,9 +1,64 @@
+import pickle
+from tensorflow.keras.models import load_model
+from tensorflow.keras.preprocessing import sequence
+
from flask import Flask, render_template, request
+from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+
app = Flask(__name__)
+
+model = None
+tokenizer = None
+loaded = False # Flag to ensure loading happens only once
+
+def load_keras_model():
+ global model
+ try:
+ model = load_model('models/uci_sentimentanalysis.h5')
+ print("Model loaded successfully.")
+ except Exception as e:
+ print(f"Error loading model: {e}")
+
+def load_tokenizer():
+ global tokenizer
+ try:
+ with open('models/tokenizer.pickle', 'rb') as handle:
+ tokenizer = pickle.load(handle)
+ print("Tokenizer loaded successfully.")
+ except Exception as e:
+ print(f"Error loading tokenizer: {e}")
+
+# Befor_first_request was not working with the keras version i have isntalled
+@app.before_request
+def before_request():
+ global loaded
+ if not loaded:
+ load_keras_model()
+ load_tokenizer()
+ loaded = True # Set the flag to True to prevent reloading
+
+def sentiment_analysis(input_text):
+ if tokenizer is None or model is None:
+ return "Model or tokenizer not loaded."
+
+ user_sequences = tokenizer.texts_to_sequences([input_text])
+ user_sequences_matrix = sequence.pad_sequences(user_sequences, maxlen=1225)
+ prediction = model.predict(user_sequences_matrix)
+
+ return round(float(prediction[0][0]), 2)
+
@app.route("/", methods=["GET", "POST"])
def index():
- # TODO: Write the code that calls the sentiment analysis functions here.
- # hint: use request.method == "POST"
- return render_template('form.html')
+ sentiment = {}
+ if request.method == "POST":
+ text = request.form.get("user_text") # Get user input
+ if text:
+ analyzer = SentimentIntensityAnalyzer()
+ sentiment = analyzer.polarity_scores(text) # VADER analysis
+ sentiment["custom model positive"] = sentiment_analysis(text) # Custom model analysis
+
+ return render_template('form.html', sentiment=sentiment)
+
if __name__ == "__main__":
- app.run()
+ app.run(debug=True)
+
diff --git a/models/tokenizer.pickle b/models/tokenizer.pickle
new file mode 100644
index 0000000..bcc876a
Binary files /dev/null and b/models/tokenizer.pickle differ
diff --git a/models/uci_sentimentanalysis.h5 b/models/uci_sentimentanalysis.h5
new file mode 100644
index 0000000..9d39ff4
Binary files /dev/null and b/models/uci_sentimentanalysis.h5 differ
diff --git a/notebooks/US_02.ipynb b/notebooks/US_02.ipynb
new file mode 100644
index 0000000..9fd26f0
--- /dev/null
+++ b/notebooks/US_02.ipynb
@@ -0,0 +1,577 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "language_info": {
+ "name": "python"
+ }
+ },
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "wDR9jCIVpck3",
+ "outputId": "f2d5cb78-b1cc-49b0-ed4f-8ca8f5d349c6"
+ },
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " % Total % Received % Xferd Average Speed Time Time Time Current\n",
+ " Dload Upload Total Spent Left Speed\n",
+ "100 84188 0 84188 0 0 143k 0 --:--:-- --:--:-- --:--:-- 143k\n",
+ "Archive: uci-labelled-sentences.zip\n",
+ " creating: sentiment labelled sentences/\n",
+ " inflating: sentiment labelled sentences/.DS_Store \n",
+ " creating: __MACOSX/\n",
+ " creating: __MACOSX/sentiment labelled sentences/\n",
+ " inflating: __MACOSX/sentiment labelled sentences/._.DS_Store \n",
+ " inflating: sentiment labelled sentences/amazon_cells_labelled.txt \n",
+ " inflating: sentiment labelled sentences/imdb_labelled.txt \n",
+ " inflating: __MACOSX/sentiment labelled sentences/._imdb_labelled.txt \n",
+ " inflating: sentiment labelled sentences/readme.txt \n",
+ " inflating: __MACOSX/sentiment labelled sentences/._readme.txt \n",
+ " inflating: sentiment labelled sentences/yelp_labelled.txt \n",
+ " inflating: __MACOSX/._sentiment labelled sentences \n"
+ ]
+ }
+ ],
+ "source": [
+ "# download dataset from the UCI website\n",
+ "!curl -o uci-labelled-sentences.zip https://archive.ics.uci.edu/static/public/331/sentiment+labelled+sentences.zip\n",
+ "\n",
+ "# unzip dataset in Colab\n",
+ "!unzip uci-labelled-sentences.zip"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "import pandas as pd\n",
+ "import pickle\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from tensorflow.keras.preprocessing.text import Tokenizer\n",
+ "from keras.preprocessing.sequence import pad_sequences\n",
+ "from keras.models import Sequential\n",
+ "from keras.layers import Dense, Embedding, LSTM\n",
+ "from keras.callbacks import EarlyStopping"
+ ],
+ "metadata": {
+ "id": "hl25OnZ7pmAz"
+ },
+ "execution_count": 1,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "# download dataset from the UCI website\n",
+ "!curl -o uci-labelled-sentences.zip https://archive.ics.uci.edu/static/public/331/sentiment+labelled+sentences.zip\n",
+ "\n",
+ "# unzip dataset in Colab\n",
+ "!unzip uci-labelled-sentences.zip"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "ObzFyA7diey3",
+ "outputId": "283c3183-a803-4731-eebf-d0b7e9ba817f"
+ },
+ "execution_count": 2,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ " % Total % Received % Xferd Average Speed Time Time Time Current\n",
+ " Dload Upload Total Spent Left Speed\n",
+ "\r 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r 0 0 0 0 0 0 0 0 --:--:-- --:--:-- --:--:-- 0\r100 84188 0 84188 0 0 646k 0 --:--:-- --:--:-- --:--:-- 647k\n",
+ "Archive: uci-labelled-sentences.zip\n",
+ " creating: sentiment labelled sentences/\n",
+ " inflating: sentiment labelled sentences/.DS_Store \n",
+ " creating: __MACOSX/\n",
+ " creating: __MACOSX/sentiment labelled sentences/\n",
+ " inflating: __MACOSX/sentiment labelled sentences/._.DS_Store \n",
+ " inflating: sentiment labelled sentences/amazon_cells_labelled.txt \n",
+ " inflating: sentiment labelled sentences/imdb_labelled.txt \n",
+ " inflating: __MACOSX/sentiment labelled sentences/._imdb_labelled.txt \n",
+ " inflating: sentiment labelled sentences/readme.txt \n",
+ " inflating: __MACOSX/sentiment labelled sentences/._readme.txt \n",
+ " inflating: sentiment labelled sentences/yelp_labelled.txt \n",
+ " inflating: __MACOSX/._sentiment labelled sentences \n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "df_list = []\n",
+ "\n",
+ "# Yelp\n",
+ "df_yelp = pd.read_csv('sentiment labelled sentences/yelp_labelled.txt', names=['sentence', 'label'], sep='\\t')\n",
+ "df_yelp['source'] = 'yelp'\n",
+ "df_list.append(df_yelp)\n",
+ "\n",
+ "# Amazon\n",
+ "df_amazon = pd.read_csv('sentiment labelled sentences/amazon_cells_labelled.txt', names=['sentence', 'label'], sep='\\t')\n",
+ "df_amazon['source'] = 'amazon'\n",
+ "df_list.append(df_amazon)\n",
+ "\n",
+ "# IMDB\n",
+ "df_imdb = pd.read_csv('sentiment labelled sentences/imdb_labelled.txt', names=['sentence', 'label'], sep='\\t')\n",
+ "df_imdb['source'] = 'imdb'\n",
+ "df_list.append(df_imdb)\n",
+ "\n",
+ "# Concatenate the dataframes\n",
+ "df = pd.concat(df_list)\n",
+ "\n",
+ "df.head()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 206
+ },
+ "id": "ym148xwIild2",
+ "outputId": "271905f5-4fa5-4f25-c7d6-6ea16746bc6a"
+ },
+ "execution_count": 3,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ " sentence label source\n",
+ "0 Wow... Loved this place. 1 yelp\n",
+ "1 Crust is not good. 0 yelp\n",
+ "2 Not tasty and the texture was just nasty. 0 yelp\n",
+ "3 Stopped by during the late May bank holiday of... 1 yelp\n",
+ "4 The selection on the menu was great and so wer... 1 yelp"
+ ],
+ "text/html": [
+ "\n",
+ "
\n",
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sentence | \n",
+ " label | \n",
+ " source | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " Wow... Loved this place. | \n",
+ " 1 | \n",
+ " yelp | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " Crust is not good. | \n",
+ " 0 | \n",
+ " yelp | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " Not tasty and the texture was just nasty. | \n",
+ " 0 | \n",
+ " yelp | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " Stopped by during the late May bank holiday of... | \n",
+ " 1 | \n",
+ " yelp | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " The selection on the menu was great and so wer... | \n",
+ " 1 | \n",
+ " yelp | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ],
+ "application/vnd.google.colaboratory.intrinsic+json": {
+ "type": "dataframe",
+ "variable_name": "df",
+ "summary": "{\n \"name\": \"df\",\n \"rows\": 2748,\n \"fields\": [\n {\n \"column\": \"sentence\",\n \"properties\": {\n \"dtype\": \"string\",\n \"num_unique_values\": 2731,\n \"samples\": [\n \"Tied to charger for conversations lasting more than 45 minutes.MAJOR PROBLEMS!!\",\n \"It was too predictable, even for a chick flick. \",\n \"The bose noise cancelling is amazing, which is very important for a NYC commuter.\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"label\",\n \"properties\": {\n \"dtype\": \"number\",\n \"std\": 0,\n \"min\": 0,\n \"max\": 1,\n \"num_unique_values\": 2,\n \"samples\": [\n 0,\n 1\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n },\n {\n \"column\": \"source\",\n \"properties\": {\n \"dtype\": \"category\",\n \"num_unique_values\": 3,\n \"samples\": [\n \"yelp\",\n \"amazon\"\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
+ }
+ },
+ "metadata": {},
+ "execution_count": 3
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "max_features = 2000\n",
+ "tokenizer = Tokenizer(num_words=max_features, split=' ')\n",
+ "tokenizer.fit_on_texts(df['sentence'].values)\n",
+ "X = tokenizer.texts_to_sequences(df['sentence'].values)\n",
+ "X = pad_sequences(X)\n",
+ "y = df['label'].values"
+ ],
+ "metadata": {
+ "id": "F_qjw9IVi2Bs"
+ },
+ "execution_count": 4,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.12)\n"
+ ],
+ "metadata": {
+ "id": "q60NFtcri5BJ"
+ },
+ "execution_count": 5,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "def create_model():\n",
+ " model = Sequential()\n",
+ " model.add(Embedding(max_features, 64, input_length=X.shape[1]))\n",
+ " model.add(LSTM(16))\n",
+ " model.add(Dense(1, activation='sigmoid'))\n",
+ " model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])\n",
+ " return model\n",
+ "\n",
+ "model = create_model()"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "V2GCbbHSi8K_",
+ "outputId": "522a67d2-288b-471d-aef6-fcde5c882d32"
+ },
+ "execution_count": 6,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "/usr/local/lib/python3.10/dist-packages/keras/src/layers/core/embedding.py:90: UserWarning: Argument `input_length` is deprecated. Just remove it.\n",
+ " warnings.warn(\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "model.fit(X_train, y_train, epochs=6, batch_size=16, validation_data=(X_test, y_test), callbacks = [EarlyStopping(monitor='val_accuracy', min_delta=0.001, patience=2, verbose=1)])"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "PePtblNTi-9l",
+ "outputId": "5782e609-6492-4709-d160-288eb1ae3c60"
+ },
+ "execution_count": 7,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stdout",
+ "text": [
+ "Epoch 1/6\n",
+ "\u001b[1m152/152\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m77s\u001b[0m 488ms/step - accuracy: 0.5971 - loss: 0.6729 - val_accuracy: 0.7606 - val_loss: 0.5231\n",
+ "Epoch 2/6\n",
+ "\u001b[1m152/152\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m75s\u001b[0m 491ms/step - accuracy: 0.8629 - loss: 0.3859 - val_accuracy: 0.8061 - val_loss: 0.4332\n",
+ "Epoch 3/6\n",
+ "\u001b[1m152/152\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m79s\u001b[0m 470ms/step - accuracy: 0.9335 - loss: 0.2057 - val_accuracy: 0.8121 - val_loss: 0.4459\n",
+ "Epoch 4/6\n",
+ "\u001b[1m152/152\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m72s\u001b[0m 472ms/step - accuracy: 0.9587 - loss: 0.1486 - val_accuracy: 0.8333 - val_loss: 0.4730\n",
+ "Epoch 5/6\n",
+ "\u001b[1m152/152\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 472ms/step - accuracy: 0.9692 - loss: 0.1114 - val_accuracy: 0.8121 - val_loss: 0.5696\n",
+ "Epoch 6/6\n",
+ "\u001b[1m152/152\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m82s\u001b[0m 472ms/step - accuracy: 0.9772 - loss: 0.0743 - val_accuracy: 0.8333 - val_loss: 0.5894\n",
+ "Epoch 6: early stopping\n"
+ ]
+ },
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "execution_count": 7
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "source": [
+ "model.save(\"uci_sentimentanalysis.h5\")\n",
+ "\n",
+ "with open('tokenizer.pickle', 'wb') as handle:\n",
+ " pickle.dump(tokenizer, handle, protocol=pickle.DEFAULT_PROTOCOL)"
+ ],
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "VaMeW7CYjBPj",
+ "outputId": "013e87ee-8f9c-4fb3-bcab-93679a999334"
+ },
+ "execution_count": 8,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "name": "stderr",
+ "text": [
+ "WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_model(model)`. This file format is considered legacy. We recommend using instead the native Keras format, e.g. `model.save('my_model.keras')` or `keras.saving.save_model(model, 'my_model.keras')`. \n"
+ ]
+ }
+ ]
+ }
+ ]
+}
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index a55b9cf..04b3e6d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,11 +1,151 @@
-Flask==2.2.5
-Flask-SQLAlchemy==2.5.1
-numpy==1.23.5
-keras==2.13.1
-sqlalchemy==1.4.20
-psycopg2-binary==2.9.1
-pytest==7.4.2
-scikit-learn==1.2.2
-tensorflow==2.13.0
+absl-py==2.1.0
+anyio==4.6.0
+argon2-cffi==23.1.0
+argon2-cffi-bindings==21.2.0
+arrow==1.3.0
+asttokens==2.4.1
+astunparse==1.6.3
+async-lru==2.0.4
+attrs==24.2.0
+babel==2.16.0
+beautifulsoup4==4.12.3
+bleach==6.1.0
+blinker==1.8.2
+certifi==2024.8.30
+cffi==1.17.1
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+comm==0.2.2
+contourpy==1.3.0
+cycler==0.12.1
+debugpy==1.8.6
+decorator==5.1.1
+defusedxml==0.7.1
+executing==2.1.0
+fastjsonschema==2.20.0
+filelock==3.13.1
+Flask==3.0.3
+flatbuffers==24.3.25
+fonttools==4.54.1
+fqdn==1.5.1
+fsspec==2024.2.0
+gast==0.6.0
+gitdb==4.0.11
+GitPython==3.1.43
+google-pasta==0.2.0
+grpcio==1.67.1
+h11==0.14.0
+h5py==3.12.1
+httpcore==1.0.5
+httpx==0.27.2
+idna==3.10
+ipykernel==6.29.5
+ipython==8.27.0
+isoduration==20.11.0
+itsdangerous==2.2.0
+jedi==0.19.1
+Jinja2==3.1.4
+joblib==1.4.2
+json5==0.9.25
+jsonpointer==3.0.0
+jsonschema==4.23.0
+jsonschema-specifications==2023.12.1
+jupyter-events==0.10.0
+jupyter-lsp==2.2.5
+jupyter-server-mathjax==0.2.6
+jupyter_client==8.6.3
+jupyter_core==5.7.2
+jupyter_server==2.14.2
+jupyter_server_terminals==0.5.3
+jupyterlab==4.2.5
+jupyterlab_git==0.50.1
+jupyterlab_pygments==0.3.0
+jupyterlab_server==2.27.3
+keras==3.6.0
+kiwisolver==1.4.7
+libclang==18.1.1
+Markdown==3.7
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.9.2
+matplotlib-inline==0.1.7
+mdurl==0.1.2
+mistune==3.0.2
+ml-dtypes==0.4.1
+mpmath==1.3.0
+namex==0.0.8
+nbclient==0.10.0
+nbconvert==7.16.4
+nbdime==4.0.2
+nbformat==5.10.4
+nest-asyncio==1.6.0
+networkx==3.2.1
+notebook_shim==0.2.4
+numpy==2.0.2
+opt_einsum==3.4.0
+optree==0.13.0
+overrides==7.7.0
+packaging==24.1
+pandas==2.2.3
+pandocfilters==1.5.1
+parso==0.8.4
+pexpect==4.9.0
+pillow==10.4.0
+platformdirs==4.3.6
+plotly==5.24.1
+prometheus_client==0.21.0
+prompt_toolkit==3.0.48
+protobuf==5.28.3
+psutil==6.0.0
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pycparser==2.22
+Pygments==2.18.0
+pyparsing==3.1.4
+python-dateutil==2.9.0.post0
+python-json-logger==2.0.7
+pytz==2024.2
+PyYAML==6.0.2
+pyzmq==26.2.0
+referencing==0.35.1
+requests==2.32.3
+rfc3339-validator==0.1.4
+rfc3986-validator==0.1.1
+rich==13.9.4
+rpds-py==0.20.0
+scikit-learn==1.5.2
+scipy==1.14.1
+seaborn==0.13.2
+Send2Trash==1.8.3
+setuptools==75.1.0
+six==1.16.0
+smmap==5.0.1
+sniffio==1.3.1
+soupsieve==2.6
+stack-data==0.6.3
+sympy==1.12
+tenacity==9.0.0
+tensorboard==2.18.0
+tensorboard-data-server==0.7.2
+tensorflow==2.18.0
+termcolor==2.5.0
+terminado==0.18.1
+threadpoolctl==3.5.0
+tinycss2==1.3.0
+torch==2.4.1
+tornado==6.4.1
+traitlets==5.14.3
+types-python-dateutil==2.9.0.20240906
+typing_extensions==4.9.0
+tzdata==2024.2
+uri-template==1.3.0
+urllib3==2.2.3
vaderSentiment==3.3.2
-gunicorn==19.3.0
\ No newline at end of file
+wcwidth==0.2.13
+webcolors==24.8.0
+webencodings==0.5.1
+websocket-client==1.8.0
+Werkzeug==3.1.2
+wheel==0.45.0
+wrapt==1.16.0
diff --git a/templates/form.html b/templates/form.html
index f1890b6..34ce15b 100644
--- a/templates/form.html
+++ b/templates/form.html
@@ -1,12 +1,38 @@
+
+
+
+
+
Sentiment Analysis
-
+
+
SentimentScope Analyzer
+
+
+
+ {% if sentiment %}
+
Sentiment Analysis Results:
+
Positive: {{ sentiment['pos'] * 100 }}%
+
Neutral: {{ sentiment['neu'] * 100 }}%
+
Negative: {{ sentiment['neg'] * 100 }}%
+
Compound: {{ sentiment['compound'] * 100 }}%
+
Custom Keras model: {{ sentiment['custom model positive'] }}
+ {% endif %}
+
+
+
+