diff --git a/annotator.py b/annotator.py index c6b6c0e..76cc711 100644 --- a/annotator.py +++ b/annotator.py @@ -5,7 +5,7 @@ import re, os import ast import json -from urllib.parse import urlparse, unquote +from urllib.parse import urlparse, unquote, quote from dateutil.parser import parse as date_parse from csv import Sniffer import requests @@ -337,6 +337,9 @@ def __init__( self.url, self.encoding, self.authorization ) self.file_domain = self.url.rsplit(self.file_name, 1)[0] + # use escaped name, no spaces allowed + self.file_name = quote(self.file_name) + self.meta_file_name = self.file_name.rsplit(".", 1)[0] + "-metadata.json" self.csv_namespace = self.file_domain + self.file_name + "/" self.context = [ @@ -376,9 +379,16 @@ def annotate(self) -> dict: self.result_dict = self.process_data() return self.result_dict - def convert(self, format: str) -> str: + def graph(self) -> Graph: g = Graph() g.parse(data=json.dumps(self.result_dict), format="json-ld") + # with open("test.json", "w") as f: + # json.dump(self.result_dict, f, indent=4) + return g + + def convert(self, format: str) -> str: + g = self.graph() + # g.serialize("test.ttl", format="json-ld") self.meta_file_name = self.meta_file_name.rsplit(".", 1)[0] if format in ["turtle", "longturtle"]: self.meta_file_name += ".ttl" diff --git a/app.py b/app.py index d53676b..863e25a 100644 --- a/app.py +++ b/app.py @@ -1,7 +1,7 @@ # app.py import os import base64 -from urllib.parse import urlparse +from urllib.parse import urlparse, quote import uvicorn from starlette_wtf import StarletteForm, CSRFProtectMiddleware, csrf_protect @@ -355,12 +355,13 @@ async def annotate_upload( # add prov o documentation result = {**result, **annotate_prov(request.url._url)} data = annotator.convert(format=return_type.value) + data_bytes = BytesIO(data.encode()) filename = annotator.meta_file_name # delete the temp csv file - if os.path.isfile(file.filename): - os.remove(file.filename) - return RDFStreamingResponse(content=data_bytes, filename=filename) + # if os.path.isfile(file.filename): + # os.remove(file.filename) + return RDFStreamingResponse(content=data_bytes, filename=quote(filename)) @app.post("/api/rdf", response_class=RDFStreamingResponse) diff --git a/csvw_parser.py b/csvw_parser.py index 2cd9f1d..2e31665 100644 --- a/csvw_parser.py +++ b/csvw_parser.py @@ -7,7 +7,7 @@ from rdflib.namespace import CSVW, RDF, XSD, PROV, RDFS, DC from datetime import datetime from urllib.request import urlopen -from urllib.parse import urlparse, unquote +from urllib.parse import urlparse, unquote, quote import io, os import logging import requests @@ -186,7 +186,7 @@ def __init__( # self.metagraph.serialize('metagraph.ttl') print("meta_root: " + self.meta_root) # print('csv_url: '+url) - self.base_url = "{}/".format(str(self.meta_root).rsplit("/", 1)[0]) + self.base_url = "{}/".format(quote(str(self.meta_root).rsplit("/", 1)[0])) parsed_url = urlparse(url) if parsed_url.scheme in ["https", "http", "file"]: self.csv_url = url