Skip to content

Commit

Permalink
Everything its working
Browse files Browse the repository at this point in the history
  • Loading branch information
Carlos González Gamella committed Nov 28, 2024
1 parent 68c5e46 commit 6d4850b
Show file tree
Hide file tree
Showing 7 changed files with 6,225 additions and 4,573 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -182,4 +182,5 @@ LinQAChecker-saved.json
minors_madrid/
minors_zaragoza/
DESCARGAS/
sproc/DESCARGA_PLACE_NOV

10,527 changes: 6,040 additions & 4,487 deletions descarga_minors/parsing_zgz_mad_2_place.ipynb

Large diffs are not rendered by default.

250 changes: 169 additions & 81 deletions integracion_opendata.py

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions nbs/40_io.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1599,7 +1599,7 @@
" \"Homogenizes the elements in a list such that they are all either `float` or `str`\"\n",
" \n",
" # *scalar* Pandas' `pd.NA` are turned into Numpy's `np.nan`\n",
" l = [np.NAN if (type(e) != list) and (pd.isna(e)) else e for e in l]\n",
" l = [np.nan if (type(e) != list) and (pd.isna(e)) else e for e in l]\n",
" \n",
" try:\n",
" return [float(e) for e in l]\n",
Expand Down Expand Up @@ -1636,7 +1636,7 @@
}
],
"source": [
"cast_list_to_floats_or_strs([np.NAN, '14.1'])"
"cast_list_to_floats_or_strs([np.nan, '14.1'])"
]
},
{
Expand Down Expand Up @@ -1665,7 +1665,7 @@
}
],
"source": [
"cast_list_to_floats_or_strs([np.NAN, '14.1', 'hola'])"
"cast_list_to_floats_or_strs([np.nan, '14.1', 'hola'])"
]
},
{
Expand Down
5 changes: 4 additions & 1 deletion run_all.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from sproc.core import dl, cli_read_single_zip, cli_rename_columns, read_zips, cli_extend_parquet_with_zip

import argparse
import pathlib
import sys
import yaml

# Script to run the different actions of the sproc package
# It is recommended to run this script from the command line with the following command:
# python3 run_all.py --contract_type outsiders --save_path /path/to/save/place/data

def main():
parser = argparse.ArgumentParser(description="sproc")
#Para tener control sobre las acciones a realizar. Se configura el arg "option"
Expand Down
5 changes: 5 additions & 0 deletions sproc/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,7 @@ def dl(

# Define las columnas objetivo
columna_objetivo = (
'ContractFolderStatus.ContractFolderID',
'ContractFolderStatus.TenderingTerms.FundingProgram',
'ContractFolderStatus.TenderingTerms.ProcurementNationalLegislationCode'
)
Expand All @@ -339,6 +340,10 @@ def dl(
parquet_df[col_original] = parquet_df[col_original].astype('string')
else:
print(f"La columna '{col_obj}' no se está en el DataFrame. Pero la descarga se ha completado correctamente")

#import pdb; pdb.set_trace()

print(parquet_df.columns)
# parquet_df.to_parquet(output_file.with_stem('new'))
parquet_df.to_parquet(output_file)

Expand Down
4 changes: 3 additions & 1 deletion sproc/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ def cast_list_to_floats_or_strs(l: # Input
"Homogenizes the elements in a list such that they are all either `float` or `str`"

# *scalar* Pandas' `pd.NA` are turned into Numpy's `np.nan`
l = [np.NAN if (type(e) != list) and (pd.isna(e)) else e for e in l]
#l = [np.NAN if (type(e) != list) and (pd.isna(e)) else e for e in l] DEPRECATED VERSION OF NUMPY NAN (Updated 14/11/2024)-> np.nan
l = [np.nan if (type(e) != list) and (pd.isna(e)) else e for e in l]


try:
return [float(e) for e in l]
Expand Down

0 comments on commit 6d4850b

Please sign in to comment.