From fa1fb795ec49e08c105efbee5703b88c9bd771d4 Mon Sep 17 00:00:00 2001 From: Christian A Date: Tue, 25 Jun 2024 18:29:16 +0000 Subject: [PATCH] Fixed --- .../Advanced_Retrieval_System.ipynb | 92 ++++++++++--------- 1 file changed, 51 insertions(+), 41 deletions(-) diff --git a/Group-8-Retrieval-System/Advanced_Retrieval_System.ipynb b/Group-8-Retrieval-System/Advanced_Retrieval_System.ipynb index 374ed37..9f9075c 100644 --- a/Group-8-Retrieval-System/Advanced_Retrieval_System.ipynb +++ b/Group-8-Retrieval-System/Advanced_Retrieval_System.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -20,47 +20,47 @@ "Requirement already satisfied: ir-datasets in /usr/local/lib/python3.10/dist-packages (0.5.5)\n", "Requirement already satisfied: python-terrier in /usr/local/lib/python3.10/dist-packages (0.10.0)\n", "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (3.8.1)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from tira) (4.66.1)\n", "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from tira) (2.1.3)\n", "Requirement already satisfied: requests==2.*,>=2.26 in /usr/local/lib/python3.10/dist-packages (from tira) (2.31.0)\n", - "Requirement already satisfied: tqdm in /usr/local/lib/python3.10/dist-packages (from tira) (4.66.1)\n", "Requirement already satisfied: docker==6.*,>=6.0.0 in /usr/local/lib/python3.10/dist-packages (from tira) (6.1.3)\n", "Requirement already satisfied: packaging>=14.0 in /usr/local/lib/python3.10/dist-packages (from docker==6.*,>=6.0.0->tira) (23.2)\n", - "Requirement already satisfied: websocket-client>=0.32.0 in /usr/local/lib/python3.10/dist-packages (from docker==6.*,>=6.0.0->tira) (1.7.0)\n", "Requirement already satisfied: urllib3>=1.26.0 in /usr/local/lib/python3.10/dist-packages (from docker==6.*,>=6.0.0->tira) (2.1.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests==2.*,>=2.26->tira) (3.6)\n", + "Requirement already satisfied: websocket-client>=0.32.0 in /usr/local/lib/python3.10/dist-packages (from docker==6.*,>=6.0.0->tira) (1.7.0)\n", "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests==2.*,>=2.26->tira) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests==2.*,>=2.26->tira) (3.6)\n", "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests==2.*,>=2.26->tira) (2023.11.17)\n", + "Requirement already satisfied: inscriptis>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (2.3.2)\n", + "Requirement already satisfied: ijson>=3.1.3 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (3.2.3)\n", "Requirement already satisfied: pyyaml>=5.3.1 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (6.0.1)\n", - "Requirement already satisfied: lxml>=4.5.2 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (4.9.3)\n", + "Requirement already satisfied: zlib-state>=0.1.3 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.1.6)\n", "Requirement already satisfied: beautifulsoup4>=4.4.1 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (4.12.2)\n", "Requirement already satisfied: lz4>=3.1.10 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (4.3.2)\n", - "Requirement already satisfied: numpy>=1.18.1 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (1.26.2)\n", - "Requirement already satisfied: trec-car-tools>=2.5.4 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (2.6)\n", "Requirement already satisfied: pyautocorpus>=0.1.1 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.1.12)\n", - "Requirement already satisfied: warc3-wet-clueweb09>=0.2.5 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.2.5)\n", + "Requirement already satisfied: numpy>=1.18.1 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (1.26.2)\n", "Requirement already satisfied: unlzw3>=0.2.1 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.2.2)\n", - "Requirement already satisfied: ijson>=3.1.3 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (3.2.3)\n", + "Requirement already satisfied: trec-car-tools>=2.5.4 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (2.6)\n", + "Requirement already satisfied: lxml>=4.5.2 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (4.9.3)\n", "Requirement already satisfied: warc3-wet>=0.2.3 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.2.3)\n", - "Requirement already satisfied: zlib-state>=0.1.3 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.1.6)\n", - "Requirement already satisfied: inscriptis>=2.2.0 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (2.3.2)\n", + "Requirement already satisfied: warc3-wet-clueweb09>=0.2.5 in /usr/local/lib/python3.10/dist-packages (from ir-datasets) (0.2.5)\n", + "Requirement already satisfied: dill in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.3.7)\n", + "Requirement already satisfied: ir-measures>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.3.3)\n", + "Requirement already satisfied: chest in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.2.3)\n", + "Requirement already satisfied: pyjnius>=1.4.2 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.6.1)\n", "Requirement already satisfied: statsmodels in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.14.0)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (3.1.2)\n", + "Requirement already satisfied: matchpy in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.5.5)\n", + "Requirement already satisfied: nptyping==1.4.4 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.4.4)\n", "Requirement already satisfied: more-itertools in /usr/local/lib/python3.10/dist-packages (from python-terrier) (10.1.0)\n", "Requirement already satisfied: deprecated in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.2.14)\n", - "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.3.2)\n", + "Requirement already satisfied: wget in /usr/local/lib/python3.10/dist-packages (from python-terrier) (3.2)\n", "Requirement already satisfied: pytrec-eval-terrier>=0.5.3 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.5.6)\n", - "Requirement already satisfied: nptyping==1.4.4 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.4.4)\n", - "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (3.1.2)\n", - "Requirement already satisfied: chest in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.2.3)\n", "Requirement already satisfied: scipy in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.11.4)\n", - "Requirement already satisfied: ir-measures>=0.3.1 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.3.3)\n", - "Requirement already satisfied: matchpy in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.5.5)\n", - "Requirement already satisfied: pyjnius>=1.4.2 in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.6.1)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.3.2)\n", "Requirement already satisfied: joblib in /usr/local/lib/python3.10/dist-packages (from python-terrier) (1.3.2)\n", - "Requirement already satisfied: wget in /usr/local/lib/python3.10/dist-packages (from python-terrier) (3.2)\n", - "Requirement already satisfied: dill in /usr/local/lib/python3.10/dist-packages (from python-terrier) (0.3.7)\n", "Requirement already satisfied: typish>=1.7.0 in /usr/local/lib/python3.10/dist-packages (from nptyping==1.4.4->python-terrier) (1.9.3)\n", - "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk) (8.1.7)\n", "Requirement already satisfied: regex>=2021.8.3 in /usr/local/lib/python3.10/dist-packages (from nltk) (2024.5.15)\n", + "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk) (8.1.7)\n", "Requirement already satisfied: soupsieve>1.2 in /usr/local/lib/python3.10/dist-packages (from beautifulsoup4>=4.4.1->ir-datasets) (2.5)\n", "Requirement already satisfied: cwl-eval>=1.0.10 in /usr/local/lib/python3.10/dist-packages (from ir-measures>=0.3.1->python-terrier) (1.0.12)\n", "Requirement already satisfied: cbor>=1.0.0 in /usr/local/lib/python3.10/dist-packages (from trec-car-tools>=2.5.4->ir-datasets) (1.0.0)\n", @@ -86,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -104,9 +104,19 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8\n", + "\n", + "No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.\n" + ] + } + ], "source": [ "# Create a REST client to the TIRA platform for retrieving the pre-indexed data.\n", "ensure_pyterrier_is_loaded()\n", @@ -115,7 +125,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -126,7 +136,7 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -147,7 +157,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -158,7 +168,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -168,7 +178,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -186,7 +196,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -196,7 +206,7 @@ }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -213,12 +223,12 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "# # Evaluate BM25\n", - "bm25_results = bm25.transform(topics)\n", + "bm25_results = bm25.transform(dataset.get_topics('text'))\n", "# bm25_metrics = pt.Experiment(\n", "# [bm25],\n", "# dataset.get_topics(), \n", @@ -229,12 +239,12 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# # Evaluate TF-IDF\n", - "tfidf_results = tfidf.transform(topics)\n", + "tfidf_results = tfidf.transform(dataset.get_topics('text'))\n", "# tfidf_metrics = pt.Experiment(\n", "# [tfidf],\n", "# topics,\n", @@ -252,7 +262,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -276,7 +286,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -301,7 +311,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -318,7 +328,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -336,7 +346,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -353,7 +363,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": null, "metadata": {}, "outputs": [], "source": [