Skip to content

Commit

Permalink
Merge pull request #25 from CentreForDigitalHumanities/feature/term-urls
Browse files Browse the repository at this point in the history
Add vocabulary URLs to LIDIA terms
  • Loading branch information
ar-jan authored Mar 28, 2024
2 parents 94cdbf8 + 842afb8 commit 05ecdaa
Show file tree
Hide file tree
Showing 9 changed files with 117 additions and 9 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ __pycache__/
htdocs/
.idea/
.venv/
data/
14 changes: 13 additions & 1 deletion lidiabrowser/lidia/admin.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import List, Type
from django.contrib import admin
from django.http import HttpRequest
from django.utils.html import format_html_join, format_html

from .models import (
Annotation,
Expand Down Expand Up @@ -143,10 +144,21 @@ class PublicationAdmin(admin.ModelAdmin):


class LidiaTermAdmin(admin.ModelAdmin):
list_display = ["term", "vocab"]
list_display = ["term", "vocab", "formatted_urls"]
list_filter = ["vocab"]
fields = ["term", "vocab", "formatted_urls"]
change_form_template = "lidia/change_form_lidiaterm.html"

@admin.display(description="URLs")
def formatted_urls(self, obj):
if obj.urls:
links = [
(item['vocab'], format_html('<a href="{}">{}</a>', item['url'], item['term']))
for item in obj.urls
]
return format_html_join(', ', '{}: {}', (link for link in links))
return '' # Return an empty string if there are no URLs


class ArticleTermAdmin(admin.ModelAdmin):
change_form_template = "lidia/change_form_articleterm.html"
Expand Down
18 changes: 18 additions & 0 deletions lidiabrowser/lidia/migrations/0003_alter_lidiaterm_vocab.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.7 on 2024-03-22 15:54

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('lidia', '0002_alter_category_options_remove_publication_zotero_id_and_more'),
]

operations = [
migrations.AlterField(
model_name='lidiaterm',
name='vocab',
field=models.CharField(choices=[('lidia', 'LIDIA'), ('custom', 'custom')], max_length=6, verbose_name='vocabulary'),
),
]
18 changes: 18 additions & 0 deletions lidiabrowser/lidia/migrations/0004_lidiaterm_urls.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 4.2.7 on 2024-03-22 16:54

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('lidia', '0003_alter_lidiaterm_vocab'),
]

operations = [
migrations.AddField(
model_name='lidiaterm',
name='urls',
field=models.JSONField(blank=True, null=True, verbose_name='URLs'),
),
]
3 changes: 2 additions & 1 deletion lidiabrowser/lidia/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,12 +145,13 @@ def __str__(self):

class LidiaTerm(models.Model):
VOCAB_CHOICES = [
('lol', 'Lexicon of Linguistics'),
('lidia', 'LIDIA'),
('custom', 'custom'),
]

vocab = models.CharField("vocabulary", max_length=6, choices=VOCAB_CHOICES)
term = models.CharField("LIDIA term", max_length=100)
urls = models.JSONField("URLs", null=True, blank=True)

class Meta:
unique_together = [['vocab', 'term']]
Expand Down
3 changes: 3 additions & 0 deletions lidiabrowser/lidiabrowser/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,3 +180,6 @@
ZOTERO_LIBRARY_ID = env.str("ZOTERO_LIBRARY_ID")
ZOTERO_LIBRARY_TYPE = env.str("ZOTERO_LIBRARY_TYPE")
ZOTERO_API_KEY = env.str("ZOTERO_API_KEY")
# LEXICON locations can be set in .env to override defaults
LEXICON_URL = env.str('LEXICON_URL', "https://github.com/CentreForDigitalHumanities/lidia-zotero/raw/main/vocabulary/lexicon.xlsx")
LEXICON_FILEPATH = env.str('LEXICON_FILEPATH', BASE_DIR / "data" / "lexicon.xlsx")
64 changes: 57 additions & 7 deletions lidiabrowser/sync/populate.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
from typing import Optional
from django.db import transaction

import yaml
import logging
import os
import shutil
import urllib.request
import yaml

import openpyxl
from django.conf import settings
from django.db import transaction
from typing import Optional

import sync.models as syncmodels
from sync.zoteroutils import get_attachment_url, get_attachment_id_from_url
from lidia.models import (
Annotation,
ArticleTerm,
Expand All @@ -17,11 +21,49 @@
Publication,
TermGroup,
)
from sync.zoteroutils import get_attachment_url, get_attachment_id_from_url


logger = logging.getLogger(__name__)


LIDIAPREFIX = "~~~~LIDIA~~~~"
LEXICON_URLS = {}


def fetch_lexicon_data():
url = settings.LEXICON_URL
filename = settings.LEXICON_FILEPATH

if os.path.isfile(filename) and os.path.getsize(filename) > 0:
logger.info("Lexicon spreadsheet already downloaded.")
return

os.makedirs(os.path.dirname(filename), exist_ok=True)

try:
with urllib.request.urlopen(url) as response, \
open(filename, 'wb') as out_file:
shutil.copyfileobj(response, out_file)
logger.info("Lexicon spreadsheet downloaded successfully.")
except Exception as e:
logger.error(f"Error downloading lexicon spreadsheet: {e}")


def load_lexicon_data():
workbook = openpyxl.load_workbook(settings.LEXICON_FILEPATH)
sheet = workbook['entries']
headers = {}
for i, cell in enumerate(sheet[1]): # Get headers from the first row
headers[cell.value] = i
for row in sheet.iter_rows(min_row=2, values_only=True): # Skip header
slug = row[headers['slug']]
# Store terms and urls if they exist
LEXICON_URLS[slug] = []
if row[headers['ull']]:
LEXICON_URLS[slug].append({'vocab': 'ull', 'term': row[headers['ull']], 'url': row[headers['ull-url']]})
if row[headers['ccr']]:
LEXICON_URLS[slug].append({'vocab': 'ccr', 'term': row[headers['ccr']], 'url': row[headers['ccr-url']]})


def process_continuation_annotations() -> None:
Expand Down Expand Up @@ -60,6 +102,7 @@ def process_continuation_annotations() -> None:


def create_lidiaterm(lexiconterm: str, customterm: str) -> Optional[LidiaTerm]:
urls_data = None
if not lexiconterm:
return None
if lexiconterm == 'custom':
Expand All @@ -68,12 +111,16 @@ def create_lidiaterm(lexiconterm: str, customterm: str) -> Optional[LidiaTerm]:
vocab = 'custom'
term = customterm
else:
vocab = 'lol'
vocab = 'lidia'
term = lexiconterm
if term in LEXICON_URLS:
urls_data = LEXICON_URLS[term]
lidiaterm, _ = LidiaTerm.objects.get_or_create(
vocab=vocab,
term=term
term=term,
defaults={'urls': urls_data}
)

return lidiaterm


Expand Down Expand Up @@ -115,6 +162,9 @@ def create_term_group(annotation: Annotation, index: int, data: dict) -> TermGro


def populate():
fetch_lexicon_data()
load_lexicon_data() # Load LEXICON_URLS global

for pub in syncmodels.Publication.objects.iterator():
with transaction.atomic():
zotero_id = pub.zotero_id
Expand Down
1 change: 1 addition & 0 deletions requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ pyzotero
Django
django-environ
iso639-lang>=2.1.0
openpyxl
4 changes: 4 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,16 @@ django==4.2.7
# via -r requirements.in
django-environ==0.11.2
# via -r requirements.in
et-xmlfile==1.1.0
# via openpyxl
feedparser==6.0.10
# via pyzotero
idna==3.6
# via requests
iso639-lang==2.2.1
# via -r requirements.in
openpyxl==3.1.2
# via -r requirements.in
pyparsing==3.1.1
# via bibtexparser
python-dotenv==1.0.0
Expand Down

0 comments on commit 05ecdaa

Please sign in to comment.