Skip to content

Commit

Permalink
fixed a bug
Browse files Browse the repository at this point in the history
  • Loading branch information
Thomas Hanke committed Feb 20, 2023
1 parent 353af89 commit 1900207
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 34 deletions.
37 changes: 6 additions & 31 deletions annotator.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,21 +277,6 @@ def get_num_header_rows_and_dataframe(self, file_data, separator_string, header_
skiprows=header_length, encoding=encoding)
except:
table_data=pd.DataFrame()
#print(table_data)
# good_readout = False
# while not good_readout:
# file_string.seek(0)
# # table_data = pd.read_csv(file_string, header=list(range(num_header_rows)), sep=separator_string,
# skiprows=header_length, encoding=encoding)

# # test if all text values in first table row -> is a second header row
# all_text = all([self.get_value_type(
# value) == 'TEXT' for column, value in table_data.iloc[0].items()])
# if all_text:
# num_header_rows += 1
# continue
# else:
# good_readout = True
return num_header_rows, table_data

def get_unit(self, string):
Expand Down Expand Up @@ -508,8 +493,6 @@ def process_file(self, file_name, file_data, separator, header_separator, encodi
metadata_csvw["url"] = file_name
data_table_header_row_index, data_table_column_count = self.get_table_charateristics(
file_data, separator, encoding)
#print(data_table_header_row_index)
# print(data_table_header_row_index, data_table_column_count)
# read additional header lines and provide as meta in results dict
if data_table_header_row_index != 0:
header_data = self.get_additional_header(
Expand All @@ -519,14 +502,12 @@ def process_file(self, file_name, file_data, separator, header_separator, encodi
# print("serialze additinal header")
metadata_csvw["notes"] = self.serialize_header(
header_data, filename=file_name)
#print(metadata_csvw["notes"])
# read tabular data structure, and determine number of header lines for column description used
header_lines, table_data = self.get_num_header_rows_and_dataframe(
file_data, separator, data_table_header_row_index, encoding)
# describe dialect
metadata_csvw["dialect"] = {"delimiter": separator,
"skipRows": data_table_header_row_index, "headerRowCount": header_lines, "encoding": encoding}
#print(metadata_csvw["dialect"])
# describe columns
if not table_data.empty:
column_json = list()
Expand Down Expand Up @@ -573,22 +554,16 @@ def process_file(self, file_name, file_data, separator, header_separator, encodi

if self.include_table_data:
#serialize row of the table data
#print(metadata_csvw["tableSchema"]["columns"][0]['name'])
columns_names=[item['name'] for item in metadata_csvw["tableSchema"]["columns"] if item['name']!='GID']
#set names of colums same as in mteadata
table_data.columns=columns_names
#table_data.insert(0,'@id',data_table_header_row_index+header_lines+table_data.index)
table_data.insert(0,'@id',table_data.index)
#table_data.insert(1,'rownum',table_data.index)
#table_data['@id']=file_name+'#row='+table_data['@id'].astype(str)
table_data.insert(0,'url',data_table_header_row_index+header_lines+table_data.index)
table_data.insert(1,'rownum',table_data.index)
table_data.insert(2,'@id',table_data.index)
table_data['@id']='gid-'+table_data['@id'].astype(str)
table_entrys=list()
for index, record in enumerate(table_data.to_dict('records')):
record_dict=dict()
record_dict['url']=file_name+'#row='+str(data_table_header_row_index+header_lines+int(record['@id'][4:]))
record_dict['rownum']=index
record_dict['describes']=[record]
table_entrys.append(record_dict)
table_data['url']=file_name+'#row='+table_data['url'].astype(str)
table_entrys=[{'url': record.pop('url'), 'rownum': record.pop('rownum'), 'discribes':record}
for record in table_data.to_dict('records')]
metadata_csvw["row"] =table_entrys
result = json.dumps(metadata_csvw, indent=4)
meta_file_name = file_name.split(sep='.')[0] + '-metadata.json'
Expand Down
16 changes: 14 additions & 2 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from starlette.middleware import Middleware
from starlette.middleware.sessions import SessionMiddleware
from starlette.middleware.cors import CORSMiddleware
from starlette.concurrency import run_in_threadpool
from typing import Optional, Any

from pydantic import BaseSettings, BaseModel, AnyUrl, Field
Expand Down Expand Up @@ -161,10 +162,11 @@ async def index(request: Request):
start_form.data_url.data=start_form.data_url.render_kw['placeholder']
flash(request,'URL Data File empty: using placeholder value for demonstration','info')
try:
meta_file_name, result = annotator.process(
start_form.data_url.data)
meta_file_name, result = await run_in_threadpool(annotator.process, start_form.data_url.data)
except (ValueError, TypeError) as error:
flash(request,str(error),'error')
meta_file_name=''
payload=''
else:
b64 = base64.b64encode(result.encode())
payload = b64.decode()
Expand Down Expand Up @@ -196,6 +198,16 @@ async def api(annotate: AnnotateRequest) -> dict:
async def info() -> dict:
return settings

#time http calls
from time import time
@app.middleware("http")
async def add_process_time_header(request: Request, call_next):
start_time = time()
response = await call_next(request)
process_time = time() - start_time
response.headers["X-Process-Time"] = str(process_time)
return response

if __name__ == "__main__":
port = int(os.environ.get("PORT", 5000))
app_mode=os.environ.get("APP_MODE") or 'production'
Expand Down
3 changes: 2 additions & 1 deletion templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

{% block head %}
{{ super() }}

<link rel="shortcut icon" href="#">
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-1BmE4kWBq78iYhFldvKuhfTAU6auU8tT94WrHftjDbrCEXSU1oBoqyl2QvZ6jIW3" crossorigin="anonymous">
{% endblock %}
Expand Down Expand Up @@ -84,7 +85,7 @@ <h5 class="card-title">Result</h5>
</div>
</main>
<footer class="pt-3 my-4 text-center text-muted border-top">
<span class="site-footer-owner"><a href="https://github.com/Mat-O-Lab/CSVtoCSVW">CSVtoCSVW</a> is maintained by <a href="https://matolab.org"><img src="{{ logo }}" width="5%" alt="mat-o-lab-logo"/></a>.</span>
<span class="site-footer-owner"><a href="https://github.com/Mat-O-Lab/CSVtoCSVW">CSVtoCSVW</a> is maintained by <a href="https://matolab.org"><img src="{{ url_for('static', path='/resources/MatOLab-Logo.svg') }}" width="5%" alt="mat-o-lab-logo"/></a>.</span>
</footer>
{% endblock %}

Expand Down

0 comments on commit 1900207

Please sign in to comment.