Skip to content

Commit

Permalink
Merge branch 'accessibility' into 9868_acc_reflow_related_resources
Browse files Browse the repository at this point in the history
  • Loading branch information
phudson-he committed Oct 19, 2023
2 parents bb08aea + 0d667b1 commit 64cb69e
Show file tree
Hide file tree
Showing 91 changed files with 1,641 additions and 1,233 deletions.
12 changes: 6 additions & 6 deletions arches/app/datatypes/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -1651,8 +1651,8 @@ def transform_value_for_tile(self, value, **kwargs):
Accepts a comma delimited string of file paths as 'value' to create a file datatype value
with corresponding file record in the files table for each path. Only the basename of each path is used, so
the accuracy of the full path is not important. However the name of each file must match the name of a file in
the directory from which Arches will request files. By default, this is the 'uploadedfiles' directory
in a project.
the directory from which Arches will request files. By default, this is the directory in a project as defined
in settings.UPLOADED_FILES_DIR.
"""

Expand All @@ -1671,15 +1671,15 @@ def transform_value_for_tile(self, value, **kwargs):
tile_file["name"] = os.path.basename(file_path)
tile_file["type"] = mime.guess_type(file_path)[0]
tile_file["type"] = "" if tile_file["type"] is None else tile_file["type"]
file_path = "uploadedfiles/" + str(tile_file["name"])
file_path = "%s/%s" % (settings.UPLOADED_FILES_DIR, str(tile_file["name"]))
tile_file["file_id"] = str(uuid.uuid4())
if source_path:
source_file = os.path.join(source_path, tile_file["name"])
fs = default_storage
try:
with default_storage.open(source_file) as f:
current_file, created = models.File.objects.get_or_create(fileid=tile_file["file_id"])
filename = fs.save(os.path.join("uploadedfiles", os.path.basename(f.name)), File(f))
filename = fs.save(os.path.join(settings.UPLOADED_FILES_DIR, os.path.basename(f.name)), File(f))
current_file.path = os.path.join(filename)
current_file.save()
tile_file["size"] = current_file.path.size
Expand All @@ -1705,7 +1705,7 @@ def pre_tile_save(self, tile, nodeid):
if file["file_id"]:
if file["url"] == f'{settings.MEDIA_URL}{file["file_id"]}':
val = uuid.UUID(file["file_id"]) # to test if file_id is uuid
file_path = "uploadedfiles/" + file["name"]
file_path = "%s/%s" % (settings.UPLOADED_FILES_DIR, file["name"])
try:
file_model = models.File.objects.get(pk=file["file_id"])
except ObjectDoesNotExist:
Expand All @@ -1723,7 +1723,7 @@ def pre_tile_save(self, tile, nodeid):
logger.warning(_("This file's fileid is not a valid UUID"))

def transform_export_values(self, value, *args, **kwargs):
return ",".join([settings.MEDIA_URL + "uploadedfiles/" + str(file["name"]) for file in value])
return ",".join([settings.MEDIA_URL + settings.UPLOADED_FILES_DIR + "/" + str(file["name"]) for file in value])

def is_a_literal_in_rdf(self):
return False
Expand Down
157 changes: 89 additions & 68 deletions arches/app/etl_modules/base_data_editor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,11 @@
from django.utils.decorators import method_decorator
from django.utils.translation import gettext as _
from arches.app.datatypes.datatypes import DataTypeFactory
from arches.app.models.models import GraphModel, Node
from arches.app.models.models import GraphModel, Node, ETLModule
from arches.app.models.system_settings import settings
from arches.app.search.elasticsearch_dsl_builder import Bool, Exists, FiltersAgg, Nested, NestedAgg, Query, Wildcard
from arches.app.search.mappings import RESOURCES_INDEX
from arches.app.search.search_engine_factory import SearchEngineFactory
import arches.app.tasks as tasks
from arches.app.etl_modules.decorators import load_data_async
from arches.app.etl_modules.save import save_to_tiles
Expand Down Expand Up @@ -114,12 +117,13 @@ def create_load_event(self, cursor, load_details):

return result

def stage_data(self, cursor, graph_id, node_id, resourceids, operation, text_replacing, language_code, case_insensitive):
def stage_data(self, cursor, module_id, graph_id, node_id, resourceids, operation, text_replacing, language_code, case_insensitive):
result = {"success": False}
update_limit = ETLModule.objects.get(pk=module_id).config["updateLimit"]
try:
cursor.execute(
"""SELECT * FROM __arches_stage_string_data_for_bulk_edit(%s, %s, %s, %s, %s, %s, %s, %s, %s)""",
(self.loadid, graph_id, node_id, self.moduleid, (resourceids), operation, text_replacing, language_code, case_insensitive),
"""SELECT * FROM __arches_stage_string_data_for_bulk_edit(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""",
(self.loadid, graph_id, node_id, self.moduleid, (resourceids), operation, text_replacing, language_code, case_insensitive, update_limit),
)
result["success"] = True
except Exception as e:
Expand Down Expand Up @@ -164,73 +168,90 @@ def edit_staged_data(self, cursor, graph_id, node_id, operation, language_code,
result["message"] = _("Unable to edit staged data: {}").format(str(e))
return result

def get_preview_data(self, graph_id, node_id, resourceids, language_code, old_text, case_insensitive):
node_id_query = " AND nodeid = %(node_id)s" if node_id else ""
graph_id_query = " AND graphid = %(graph_id)s" if graph_id else ""
resourceids_query = " AND resourceinstanceid IN %(resourceids)s" if resourceids else ""
like_operator = "ilike" if case_insensitive == "true" else "like"
old_text_like = "%" + old_text + "%" if old_text else ""
text_query = (
" AND t.tiledata -> %(node_id)s -> %(language_code)s ->> 'value' " + like_operator + " %(old_text)s" if old_text else ""
)
def get_preview_data(self, node_id, search_url, language_code, operation, old_text, case_insensitive):
request = HttpRequest()
request.user = self.request.user
request.method = "GET"
request.GET["paging-filter"] = 1
request.GET["tiles"] = True

if language_code is None:
language_code = "en"

request_parmas_dict = {
"node_id": node_id,
"language_code": language_code,
"graph_id": graph_id,
"resourceids": resourceids,
"old_text": old_text_like,
if search_url:
params = parse_qs(urlsplit(search_url).query)
for k, v in params.items():
request.GET.__setitem__(k, v[0])

search_url_query = search_results(request, returnDsl=True).dsl["query"]
case_insensitive = True if case_insensitive == "true" else False

if old_text:
search_query = Wildcard(
field=f"tiles.data.{node_id}.{language_code}.value.keyword",
query=f"*{old_text}*",
case_insensitive=case_insensitive,
)
search_bool_agg = Bool()
search_bool_agg.must(search_query)

else:
if operation.startswith("upper"):
regexp = "(.*[a-z].*)"
elif operation.startswith("lower"):
regexp = "(.*[A-Z].*)"
elif operation.startswith("capitalize"):
regexp = "([a-z].*)|([A-Z][a-zA-Z]*[A-Z].*)|((.+[ ]+)[a-z].*)|((.+[ ]+)[A-Z][a-zA-Z]*[A-Z].*)"
elif operation.startswith("trim"):
regexp = "[ \t].*|.*[ \t]"
case_search_query = {
"regexp": {
f"tiles.data.{str(node_id)}.{language_code}.value.keyword": {
"value": regexp
}
}
}
search_query = Bool()
search_query.must(case_search_query)
search_bool_agg = Bool()
search_bool_agg.must(case_search_query)

string_search_nested = Nested(path="tiles", query=search_query)
inner_hits_query = {
"inner_hits": {
"_source": False,
"docvalue_fields": [ f"tiles.data.{node_id}.{language_code}.value.keyword" ]
}
}
string_search_nested.dsl["nested"].update(inner_hits_query)

sql_query = (
"""
SELECT t.tiledata -> %(node_id)s -> %(language_code)s ->> 'value' FROM tiles t, nodes n
WHERE t.nodegroupid = n.nodegroupid
"""
+ node_id_query
+ graph_id_query
+ resourceids_query
+ text_query
+ " LIMIT 5;"
)
search_bool_query = Bool()
search_bool_query.must(string_search_nested)

tile_count_query = (
"""
SELECT count(t.tileid) FROM tiles t, nodes n
WHERE t.nodegroupid = n.nodegroupid
"""
+ node_id_query
+ graph_id_query
+ resourceids_query
+ text_query
)
search_url_query["bool"]["must"].append(search_bool_query.dsl)

resource_count_query = (
"""
SELECT count(DISTINCT t.resourceinstanceid) FROM tiles t, nodes n
WHERE t.nodegroupid = n.nodegroupid
"""
+ node_id_query
+ graph_id_query
+ resourceids_query
+ text_query
)
search_filter_agg = FiltersAgg(name="string_search")
search_filter_agg.add_filter(search_bool_agg)

with connection.cursor() as cursor:
cursor.execute(sql_query, request_parmas_dict)
row = [value[0] for value in cursor.fetchall()]
nested_agg = NestedAgg(path="tiles", name="tile_agg")
nested_agg.add_aggregation(search_filter_agg)

se = SearchEngineFactory().create()
query = Query(se, limit=5)

cursor.execute(tile_count_query, request_parmas_dict)
count = cursor.fetchall()
(number_of_tiles,) = count[0]
query.add_query(search_url_query)
query.add_aggregation(nested_agg)

cursor.execute(resource_count_query, request_parmas_dict)
count = cursor.fetchall()
(number_of_resources,) = count[0]
results = query.search(index=RESOURCES_INDEX)
values = []
for hit in results['hits']['hits']:
for tile in hit['inner_hits']['tiles']['hits']['hits']:
values.append(tile['fields'][f"tiles.data.{node_id}.{language_code}.value.keyword"][0])

return row, number_of_tiles, number_of_resources
number_of_resources = results['hits']['total']['value']
number_of_tiles = results["aggregations"]["tile_agg"]["string_search"]["buckets"][0]["doc_count"]

return values[:5], number_of_tiles, number_of_resources

def preview(self, request):
graph_id = request.POST.get("graph_id", None)
Expand All @@ -251,13 +272,13 @@ def preview(self, request):
if resourceids:
resourceids = tuple(resourceids)

if case_insensitive == "true" and operation == "replace":
if case_insensitive and operation == "replace":
operation = "replace_i"
if also_trim == "true":
operation = operation + "_trim"

first_five_values, number_of_tiles, number_of_resources = self.get_preview_data(
graph_id, node_id, resourceids, language_code, old_text, case_insensitive
node_id, search_url, language_code, operation, old_text, case_insensitive
)
return_list = []
with connection.cursor() as cursor:
Expand Down Expand Up @@ -320,7 +341,7 @@ def write(self, request):
}

first_five_values, number_of_tiles, number_of_resources = self.get_preview_data(
graph_id, node_id, resourceids, language_code, old_text, case_insensitive
node_id, search_url, language_code, operation, old_text, case_insensitive
)

load_details = {
Expand All @@ -340,7 +361,7 @@ def write(self, request):
if use_celery_bulk_edit:
response = self.run_load_task_async(request, self.loadid)
else:
response = self.run_load_task(self.loadid, graph_id, node_id, operation, language_code, old_text, new_text, resourceids)
response = self.run_load_task(self.userid, self.loadid, self.moduleid, graph_id, node_id, operation, language_code, old_text, new_text, resourceids)
else:
self.log_event(cursor, "failed")
return {"success": False, "data": event_created["message"]}
Expand Down Expand Up @@ -371,23 +392,23 @@ def run_load_task_async(self, request):
operation = operation + "_trim"

edit_task = tasks.edit_bulk_string_data.apply_async(
(self.loadid, graph_id, node_id, operation, language_code, old_text, new_text, resourceids, self.userid),
(self.userid, self.loadid, self.moduleid, graph_id, node_id, operation, language_code, old_text, new_text, resourceids),
)
with connection.cursor() as cursor:
cursor.execute(
"""UPDATE load_event SET taskid = %s WHERE loadid = %s""",
(edit_task.task_id, self.loadid),
)

def run_load_task(self, loadid, graph_id, node_id, operation, language_code, old_text, new_text, resourceids):
def run_load_task(self, userid, loadid, module_id, graph_id, node_id, operation, language_code, old_text, new_text, resourceids):
if resourceids:
resourceids = [uuid.UUID(id) for id in resourceids]
case_insensitive = False
if operation == "replace_i":
case_insensitive = True

with connection.cursor() as cursor:
data_staged = self.stage_data(cursor, graph_id, node_id, resourceids, operation, old_text, language_code, case_insensitive)
data_staged = self.stage_data(cursor, module_id, graph_id, node_id, resourceids, operation, old_text, language_code, case_insensitive)

if data_staged["success"]:
data_updated = self.edit_staged_data(cursor, graph_id, node_id, operation, language_code, old_text, new_text)
Expand All @@ -397,7 +418,7 @@ def run_load_task(self, loadid, graph_id, node_id, operation, language_code, old

if data_updated["success"]:
self.loadid = loadid # currently redundant, but be certain
data_updated = save_to_tiles(loadid, finalize_import=False)
data_updated = save_to_tiles(userid, loadid, finalize_import=False)
return {"success": True, "data": "done"}
else:
with connection.cursor() as cursor:
Expand Down
15 changes: 8 additions & 7 deletions arches/app/etl_modules/base_import_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

from arches.app.etl_modules.save import save_to_tiles
from arches.app.models.models import Node
from arches.app.models.system_settings import settings
from arches.app.utils.decorators import user_created_transaction_match
from arches.app.utils.file_validator import FileValidator
from arches.app.utils.transaction import reverse_edit_log_entries
Expand Down Expand Up @@ -156,7 +157,7 @@ def get_node_lookup(self, nodes):
lookup[node.alias] = {"nodeid": str(node.nodeid), "datatype": node.datatype, "config": node.config}
return lookup

def run_load_task(self, files, summary, result, temp_dir, loadid):
def run_load_task(self, userid, files, summary, result, temp_dir, loadid):
with connection.cursor() as cursor:
for file in files.keys():
self.stage_excel_file(file, summary, cursor)
Expand All @@ -173,7 +174,7 @@ def run_load_task(self, files, summary, result, temp_dir, loadid):
result["validation"] = self.validate(loadid)
if len(result["validation"]["data"]) == 0:
self.loadid = loadid # currently redundant, but be certain
save_to_tiles(loadid, multiprocessing=False)
save_to_tiles(userid, loadid, multiprocessing=False)
else:
cursor.execute(
"""UPDATE load_event SET status = %s, load_end_time = %s WHERE loadid = %s""",
Expand Down Expand Up @@ -204,7 +205,7 @@ def read(self, request):
self.loadid = request.POST.get("load_id")
self.cumulative_excel_files_size = 0
content = request.FILES["file"]
self.temp_dir = os.path.join("uploadedfiles", "tmp", self.loadid)
self.temp_dir = os.path.join(settings.UPLOADED_FILES_DIR, "tmp", self.loadid)
try:
self.delete_from_default_storage(self.temp_dir)
except (FileNotFoundError):
Expand Down Expand Up @@ -254,7 +255,7 @@ def read(self, request):

def start(self, request):
self.loadid = request.POST.get("load_id")
self.temp_dir = os.path.join("uploadedfiles", "tmp", self.loadid)
self.temp_dir = os.path.join(settings.UPLOADED_FILES_DIR, "tmp", self.loadid)
result = {"started": False, "message": ""}
with connection.cursor() as cursor:
try:
Expand All @@ -270,7 +271,7 @@ def start(self, request):

def write(self, request):
self.loadid = request.POST.get("load_id")
self.temp_dir = os.path.join("uploadedfiles", "tmp", self.loadid)
self.temp_dir = os.path.join(settings.UPLOADED_FILES_DIR, "tmp", self.loadid)
self.file_details = request.POST.get("load_details", None)
result = {}
if self.file_details:
Expand All @@ -281,10 +282,10 @@ def write(self, request):
if summary["cumulative_excel_files_size"] / 1000000 > use_celery_file_size_threshold_in_MB:
response = self.run_load_task_async(request, self.loadid)
else:
response = self.run_load_task(files, summary, result, self.temp_dir, self.loadid)
response = self.run_load_task(self.userid, files, summary, result, self.temp_dir, self.loadid)

return response

class FileValidationError(Exception):
def __init__(self, message=_("Unable to read file"), code=400):
self.title = _("Invalid Uploaded File")
Expand Down
Loading

0 comments on commit 64cb69e

Please sign in to comment.