fixed a bug

Mat-O-Lab · Feb 20, 2023 · 1900207 · 1900207
1 parent 353af89
commit 1900207
Show file tree

Hide file tree

Showing 3 changed files with 22 additions and 34 deletions.
diff --git a/annotator.py b/annotator.py
@@ -277,21 +277,6 @@ def get_num_header_rows_and_dataframe(self, file_data, separator_string, header_
             skiprows=header_length, encoding=encoding)
         except:
             table_data=pd.DataFrame()
-        #print(table_data)
-        # good_readout = False
-        # while not good_readout:
-        #     file_string.seek(0)
-        #     # table_data = pd.read_csv(file_string, header=list(range(num_header_rows)), sep=separator_string,
-        #                              skiprows=header_length, encoding=encoding)
-
-        #     # test if all text values in first table row -> is a second header row
-        #     all_text = all([self.get_value_type(
-        #         value) == 'TEXT' for column, value in table_data.iloc[0].items()])
-        #     if all_text:
-        #         num_header_rows += 1
-        #         continue
-        #     else:
-        #         good_readout = True
         return num_header_rows, table_data
 
     def get_unit(self, string):
@@ -508,8 +493,6 @@ def process_file(self, file_name, file_data, separator, header_separator, encodi
         metadata_csvw["url"] = file_name
         data_table_header_row_index, data_table_column_count = self.get_table_charateristics(
             file_data, separator, encoding)
-        #print(data_table_header_row_index)
-        # print(data_table_header_row_index, data_table_column_count)
         # read additional header lines and provide as meta in results dict
         if data_table_header_row_index != 0:
             header_data = self.get_additional_header(
@@ -519,14 +502,12 @@ def process_file(self, file_name, file_data, separator, header_separator, encodi
                 # print("serialze additinal header")
                 metadata_csvw["notes"] = self.serialize_header(
                     header_data, filename=file_name)
-        #print(metadata_csvw["notes"])
         # read tabular data structure, and determine number of header lines for column description used
         header_lines, table_data = self.get_num_header_rows_and_dataframe(
             file_data, separator, data_table_header_row_index, encoding)
         # describe dialect
         metadata_csvw["dialect"] = {"delimiter": separator,
                                     "skipRows": data_table_header_row_index, "headerRowCount": header_lines, "encoding": encoding}
-        #print(metadata_csvw["dialect"])
         # describe columns
         if not table_data.empty:
             column_json = list()
@@ -573,22 +554,16 @@ def process_file(self, file_name, file_data, separator, header_separator, encodi
 
             if self.include_table_data:
                 #serialize row of the table data
-                #print(metadata_csvw["tableSchema"]["columns"][0]['name'])
                 columns_names=[item['name'] for item in metadata_csvw["tableSchema"]["columns"] if item['name']!='GID']
                 #set names of colums same as in mteadata
                 table_data.columns=columns_names
-                #table_data.insert(0,'@id',data_table_header_row_index+header_lines+table_data.index)
-                table_data.insert(0,'@id',table_data.index)
-                #table_data.insert(1,'rownum',table_data.index)
-                #table_data['@id']=file_name+'#row='+table_data['@id'].astype(str)
+                table_data.insert(0,'url',data_table_header_row_index+header_lines+table_data.index)
+                table_data.insert(1,'rownum',table_data.index)
+                table_data.insert(2,'@id',table_data.index)
                 table_data['@id']='gid-'+table_data['@id'].astype(str)
-                table_entrys=list()
-                for index, record in enumerate(table_data.to_dict('records')):
-                    record_dict=dict()
-                    record_dict['url']=file_name+'#row='+str(data_table_header_row_index+header_lines+int(record['@id'][4:]))
-                    record_dict['rownum']=index
-                    record_dict['describes']=[record]
-                    table_entrys.append(record_dict)
+                table_data['url']=file_name+'#row='+table_data['url'].astype(str)
+                table_entrys=[{'url': record.pop('url'), 'rownum': record.pop('rownum'), 'discribes':record} 
+                for record in table_data.to_dict('records')]
                 metadata_csvw["row"] =table_entrys
         result = json.dumps(metadata_csvw, indent=4)
         meta_file_name = file_name.split(sep='.')[0] + '-metadata.json'

diff --git a/app.py b/app.py
@@ -9,6 +9,7 @@
 from starlette.middleware import Middleware
 from starlette.middleware.sessions import SessionMiddleware
 from starlette.middleware.cors import CORSMiddleware
+from starlette.concurrency import run_in_threadpool
 from typing import Optional, Any
 
 from pydantic import BaseSettings, BaseModel, AnyUrl, Field
@@ -161,10 +162,11 @@ async def index(request: Request):
             start_form.data_url.data=start_form.data_url.render_kw['placeholder']
             flash(request,'URL Data File empty: using placeholder value for demonstration','info')
         try:
-            meta_file_name, result = annotator.process(
-                start_form.data_url.data)
+            meta_file_name, result = await run_in_threadpool(annotator.process, start_form.data_url.data)
         except (ValueError, TypeError) as error:
             flash(request,str(error),'error')
+            meta_file_name=''
+            payload=''
         else:
             b64 = base64.b64encode(result.encode())
             payload = b64.decode()
@@ -196,6 +198,16 @@ async def api(annotate: AnnotateRequest) -> dict:
 async def info() -> dict:
     return settings
 
+#time http calls
+from time import time
+@app.middleware("http")
+async def add_process_time_header(request: Request, call_next):
+    start_time = time()
+    response = await call_next(request)
+    process_time = time() - start_time
+    response.headers["X-Process-Time"] = str(process_time)
+    return response
+
 if __name__ == "__main__":
     port = int(os.environ.get("PORT", 5000))
     app_mode=os.environ.get("APP_MODE") or 'production'

diff --git a/templates/index.html b/templates/index.html
@@ -3,6 +3,7 @@
 
 {% block head %}
 {{ super() }}
+
 <link rel="shortcut icon" href="#">
 <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-1BmE4kWBq78iYhFldvKuhfTAU6auU8tT94WrHftjDbrCEXSU1oBoqyl2QvZ6jIW3" crossorigin="anonymous">
 {% endblock %}
@@ -84,7 +85,7 @@ <h5 class="card-title">Result</h5>
     </div>
   </main>
 <footer class="pt-3 my-4 text-center text-muted border-top">
-  <span class="site-footer-owner"><a href="https://github.com/Mat-O-Lab/CSVtoCSVW">CSVtoCSVW</a> is maintained by <a href="https://matolab.org"><img src="{{ logo }}" width="5%" alt="mat-o-lab-logo"/></a>.</span>
+  <span class="site-footer-owner"><a href="https://github.com/Mat-O-Lab/CSVtoCSVW">CSVtoCSVW</a> is maintained by <a href="https://matolab.org"><img src="{{ url_for('static', path='/resources/MatOLab-Logo.svg') }}" width="5%" alt="mat-o-lab-logo"/></a>.</span>
 </footer>
 {% endblock %}