diff --git a/.gitignore b/.gitignore index 20fb27c..50c5c77 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ __pycache__/ htdocs/ .idea/ .venv/ +data/ diff --git a/lidiabrowser/lidia/admin.py b/lidiabrowser/lidia/admin.py index fab8f6c..186db44 100644 --- a/lidiabrowser/lidia/admin.py +++ b/lidiabrowser/lidia/admin.py @@ -1,6 +1,7 @@ from typing import List, Type from django.contrib import admin from django.http import HttpRequest +from django.utils.html import format_html_join, format_html from .models import ( Annotation, @@ -143,10 +144,21 @@ class PublicationAdmin(admin.ModelAdmin): class LidiaTermAdmin(admin.ModelAdmin): - list_display = ["term", "vocab"] + list_display = ["term", "vocab", "formatted_urls"] list_filter = ["vocab"] + fields = ["term", "vocab", "formatted_urls"] change_form_template = "lidia/change_form_lidiaterm.html" + @admin.display(description="URLs") + def formatted_urls(self, obj): + if obj.urls: + links = [ + (item['vocab'], format_html('{}', item['url'], item['term'])) + for item in obj.urls + ] + return format_html_join(', ', '{}: {}', (link for link in links)) + return '' # Return an empty string if there are no URLs + class ArticleTermAdmin(admin.ModelAdmin): change_form_template = "lidia/change_form_articleterm.html" diff --git a/lidiabrowser/lidia/migrations/0003_alter_lidiaterm_vocab.py b/lidiabrowser/lidia/migrations/0003_alter_lidiaterm_vocab.py new file mode 100644 index 0000000..94f66c0 --- /dev/null +++ b/lidiabrowser/lidia/migrations/0003_alter_lidiaterm_vocab.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.7 on 2024-03-22 15:54 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('lidia', '0002_alter_category_options_remove_publication_zotero_id_and_more'), + ] + + operations = [ + migrations.AlterField( + model_name='lidiaterm', + name='vocab', + field=models.CharField(choices=[('lidia', 'LIDIA'), ('custom', 'custom')], max_length=6, verbose_name='vocabulary'), + ), + ] diff --git a/lidiabrowser/lidia/migrations/0004_lidiaterm_urls.py b/lidiabrowser/lidia/migrations/0004_lidiaterm_urls.py new file mode 100644 index 0000000..6b67f56 --- /dev/null +++ b/lidiabrowser/lidia/migrations/0004_lidiaterm_urls.py @@ -0,0 +1,18 @@ +# Generated by Django 4.2.7 on 2024-03-22 16:54 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('lidia', '0003_alter_lidiaterm_vocab'), + ] + + operations = [ + migrations.AddField( + model_name='lidiaterm', + name='urls', + field=models.JSONField(blank=True, null=True, verbose_name='URLs'), + ), + ] diff --git a/lidiabrowser/lidia/models.py b/lidiabrowser/lidia/models.py index 1b7c1a7..f977cc9 100644 --- a/lidiabrowser/lidia/models.py +++ b/lidiabrowser/lidia/models.py @@ -145,12 +145,13 @@ def __str__(self): class LidiaTerm(models.Model): VOCAB_CHOICES = [ - ('lol', 'Lexicon of Linguistics'), + ('lidia', 'LIDIA'), ('custom', 'custom'), ] vocab = models.CharField("vocabulary", max_length=6, choices=VOCAB_CHOICES) term = models.CharField("LIDIA term", max_length=100) + urls = models.JSONField("URLs", null=True, blank=True) class Meta: unique_together = [['vocab', 'term']] diff --git a/lidiabrowser/lidiabrowser/settings.py b/lidiabrowser/lidiabrowser/settings.py index 964b554..1d3ca8f 100644 --- a/lidiabrowser/lidiabrowser/settings.py +++ b/lidiabrowser/lidiabrowser/settings.py @@ -180,3 +180,6 @@ ZOTERO_LIBRARY_ID = env.str("ZOTERO_LIBRARY_ID") ZOTERO_LIBRARY_TYPE = env.str("ZOTERO_LIBRARY_TYPE") ZOTERO_API_KEY = env.str("ZOTERO_API_KEY") +# LEXICON locations can be set in .env to override defaults +LEXICON_URL = env.str('LEXICON_URL', "https://github.com/CentreForDigitalHumanities/lidia-zotero/raw/main/vocabulary/lexicon.xlsx") +LEXICON_FILEPATH = env.str('LEXICON_FILEPATH', BASE_DIR / "data" / "lexicon.xlsx") diff --git a/lidiabrowser/sync/populate.py b/lidiabrowser/sync/populate.py index 19e3f1c..f5e5108 100644 --- a/lidiabrowser/sync/populate.py +++ b/lidiabrowser/sync/populate.py @@ -1,11 +1,15 @@ -from typing import Optional -from django.db import transaction - -import yaml import logging +import os +import shutil +import urllib.request +import yaml + +import openpyxl +from django.conf import settings +from django.db import transaction +from typing import Optional import sync.models as syncmodels -from sync.zoteroutils import get_attachment_url, get_attachment_id_from_url from lidia.models import ( Annotation, ArticleTerm, @@ -17,11 +21,49 @@ Publication, TermGroup, ) +from sync.zoteroutils import get_attachment_url, get_attachment_id_from_url + logger = logging.getLogger(__name__) LIDIAPREFIX = "~~~~LIDIA~~~~" +LEXICON_URLS = {} + + +def fetch_lexicon_data(): + url = settings.LEXICON_URL + filename = settings.LEXICON_FILEPATH + + if os.path.isfile(filename) and os.path.getsize(filename) > 0: + logger.info("Lexicon spreadsheet already downloaded.") + return + + os.makedirs(os.path.dirname(filename), exist_ok=True) + + try: + with urllib.request.urlopen(url) as response, \ + open(filename, 'wb') as out_file: + shutil.copyfileobj(response, out_file) + logger.info("Lexicon spreadsheet downloaded successfully.") + except Exception as e: + logger.error(f"Error downloading lexicon spreadsheet: {e}") + + +def load_lexicon_data(): + workbook = openpyxl.load_workbook(settings.LEXICON_FILEPATH) + sheet = workbook['entries'] + headers = {} + for i, cell in enumerate(sheet[1]): # Get headers from the first row + headers[cell.value] = i + for row in sheet.iter_rows(min_row=2, values_only=True): # Skip header + slug = row[headers['slug']] + # Store terms and urls if they exist + LEXICON_URLS[slug] = [] + if row[headers['ull']]: + LEXICON_URLS[slug].append({'vocab': 'ull', 'term': row[headers['ull']], 'url': row[headers['ull-url']]}) + if row[headers['ccr']]: + LEXICON_URLS[slug].append({'vocab': 'ccr', 'term': row[headers['ccr']], 'url': row[headers['ccr-url']]}) def process_continuation_annotations() -> None: @@ -60,6 +102,7 @@ def process_continuation_annotations() -> None: def create_lidiaterm(lexiconterm: str, customterm: str) -> Optional[LidiaTerm]: + urls_data = None if not lexiconterm: return None if lexiconterm == 'custom': @@ -68,12 +111,16 @@ def create_lidiaterm(lexiconterm: str, customterm: str) -> Optional[LidiaTerm]: vocab = 'custom' term = customterm else: - vocab = 'lol' + vocab = 'lidia' term = lexiconterm + if term in LEXICON_URLS: + urls_data = LEXICON_URLS[term] lidiaterm, _ = LidiaTerm.objects.get_or_create( vocab=vocab, - term=term + term=term, + defaults={'urls': urls_data} ) + return lidiaterm @@ -115,6 +162,9 @@ def create_term_group(annotation: Annotation, index: int, data: dict) -> TermGro def populate(): + fetch_lexicon_data() + load_lexicon_data() # Load LEXICON_URLS global + for pub in syncmodels.Publication.objects.iterator(): with transaction.atomic(): zotero_id = pub.zotero_id diff --git a/requirements.in b/requirements.in index 76ebc5b..d40099e 100644 --- a/requirements.in +++ b/requirements.in @@ -4,3 +4,4 @@ pyzotero Django django-environ iso639-lang>=2.1.0 +openpyxl diff --git a/requirements.txt b/requirements.txt index 8bee7e3..f151a7e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -16,12 +16,16 @@ django==4.2.7 # via -r requirements.in django-environ==0.11.2 # via -r requirements.in +et-xmlfile==1.1.0 + # via openpyxl feedparser==6.0.10 # via pyzotero idna==3.6 # via requests iso639-lang==2.2.1 # via -r requirements.in +openpyxl==3.1.2 + # via -r requirements.in pyparsing==3.1.1 # via bibtexparser python-dotenv==1.0.0