Skip to content

Commit

Permalink
Fix for Pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
Christian A committed Jun 22, 2024
1 parent 30ceb2e commit 7ea8d4b
Showing 1 changed file with 26 additions and 194 deletions.
220 changes: 26 additions & 194 deletions Group-8-Retrieval-System/Porter2Stemmer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"source": [
"# You only need to execute this cell if you are using Google Golab.\n",
"# If you use GitHub Codespaces, everything is already installed.\n",
"!pip3 install tira ir-datasets python-terrier"
"!pip3 install tira ir-datasets python-terrier nltk"
]
},
{
Expand All @@ -23,7 +23,7 @@
"\n",
"import nltk\n",
"from nltk.stem import PorterStemmer\n",
"nltk.download('punkt')\n",
"!nltk.download('punkt')\n",
"\n",
"import pyterrier as pt\n",
"import pandas as pd"
Expand Down Expand Up @@ -76,15 +76,6 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stemmed_topics"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [],
"source": [
"# Define the retrieval pipeline with BM25\n",
"bm25 = pt.BatchRetrieve(index, wmodel=\"BM25\")\n",
Expand All @@ -95,7 +86,7 @@
},
{
"cell_type": "code",
"execution_count": 84,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -104,203 +95,44 @@
},
{
"cell_type": "code",
"execution_count": 85,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The run file is normalized outside the TIRA sandbox, I will store it at \"../runs\".\n",
"Done. run file is stored under \"../runs/run.txt\".\n"
]
}
],
"outputs": [],
"source": [
"persist_and_normalize_run(run, system_name='bm25-baseline', default_output='../runs')"
]
},
{
"cell_type": "code",
"execution_count": 86,
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"There are multiple query fields available: ('text', 'title', 'query', 'description', 'narrative'). To use with pyterrier, provide variant or modify dataframe to add query column.\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>map</th>\n",
" <th>recip_rank</th>\n",
" <th>P_1000</th>\n",
" <th>map +</th>\n",
" <th>map -</th>\n",
" <th>map p-value</th>\n",
" <th>recip_rank +</th>\n",
" <th>recip_rank -</th>\n",
" <th>recip_rank p-value</th>\n",
" <th>P_1000 +</th>\n",
" <th>P_1000 -</th>\n",
" <th>P_1000 p-value</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>BM25</td>\n",
" <td>0.262311</td>\n",
" <td>0.579877</td>\n",
" <td>0.016191</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name map recip_rank P_1000 map + map - map p-value recip_rank + \\\n",
"0 BM25 0.262311 0.579877 0.016191 None None None None \n",
"\n",
" recip_rank - recip_rank p-value P_1000 + P_1000 - P_1000 p-value \n",
"0 None None None None None "
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"#Localtest\n",
"pt.Experiment(\n",
" [bm25], \n",
" pt_dataset.get_topics(), \n",
" pt_dataset.get_qrels(), \n",
" eval_metrics=['P_1000', 'map', 'recip_rank'],\n",
" names=['BM25'],\n",
" baseline=0\n",
" )"
"# #Localtest\n",
"# pt.Experiment(\n",
"# [bm25], \n",
"# pt_dataset.get_topics(), \n",
"# pt_dataset.get_qrels(), \n",
"# eval_metrics=['P_1000', 'map', 'recip_rank'],\n",
"# names=['BM25'],\n",
"# baseline=0\n",
"# )"
]
},
{
"cell_type": "code",
"execution_count": 87,
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>map</th>\n",
" <th>recip_rank</th>\n",
" <th>P_1000</th>\n",
" <th>map +</th>\n",
" <th>map -</th>\n",
" <th>map p-value</th>\n",
" <th>recip_rank +</th>\n",
" <th>recip_rank -</th>\n",
" <th>recip_rank p-value</th>\n",
" <th>P_1000 +</th>\n",
" <th>P_1000 -</th>\n",
" <th>P_1000 p-value</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>BM25 + Porter2 Stemmer</td>\n",
" <td>0.243377</td>\n",
" <td>0.498491</td>\n",
" <td>0.015456</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name map recip_rank P_1000 map + map - \\\n",
"0 BM25 + Porter2 Stemmer 0.243377 0.498491 0.015456 None None \n",
"\n",
" map p-value recip_rank + recip_rank - recip_rank p-value P_1000 + P_1000 - \\\n",
"0 None None None None None None \n",
"\n",
" P_1000 p-value \n",
"0 None "
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"outputs": [],
"source": [
"pt.Experiment(\n",
" [bm25], \n",
" df_stemmed_topics, \n",
" pt_dataset.get_qrels(), \n",
" eval_metrics=['P_1000', 'map', 'recip_rank'],\n",
" names=['BM25 + Porter2 Stemmer'],\n",
" baseline=0\n",
")"
"# pt.Experiment(\n",
"# [bm25], \n",
"# df_stemmed_topics, \n",
"# pt_dataset.get_qrels(), \n",
"# eval_metrics=['P_1000', 'map', 'recip_rank'],\n",
"# names=['BM25 + Porter2 Stemmer'],\n",
"# baseline=0\n",
"# )"
]
}
],
Expand Down

0 comments on commit 7ea8d4b

Please sign in to comment.