Skip to content

Commit

Permalink
Merge pull request #25 from Youssef-Harby/dev
Browse files Browse the repository at this point in the history
 fix: Error running OMDownloader #19
  • Loading branch information
Youssef-Harby authored Apr 26, 2024
2 parents 95107d2 + e849958 commit 124aabc
Show file tree
Hide file tree
Showing 9 changed files with 996 additions and 944 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/docker-build-push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ on:

jobs:
docker:
runs-on: self-hosted
runs-on: ubuntu-latest

env:
DOCKER_TAG: latest
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/poetry-pypi.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ jobs:
publish:
runs-on: ubuntu-latest
container:
image: ghcr.io/osgeo/gdal:ubuntu-full-3.7.2
image: ghcr.io/osgeo/gdal:ubuntu-full-3.8.5

env:
POETRY_NO_INTERACTION: 1
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ARG GDAL_VERSION=3.8.4
ARG GDAL_VERSION=3.8.5
FROM ghcr.io/osgeo/gdal:ubuntu-full-${GDAL_VERSION}

LABEL maintainer="Youssef Harby <[email protected]>"
Expand Down
2 changes: 1 addition & 1 deletion config.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
global_variables:
release: "2024-02-15-alpha.0"
release: "2024-04-16-beta.0"
s3_region: "us-west-2"
default_theme: "places"
default_type: "*"
Expand Down
2 changes: 1 addition & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ services:
args:
- OS_ARCH=linux
- PLATFORM_ARCH=aarch64 # amd64, i386, rpi, aarch64
- GDAL_VERSION=3.7.2
- GDAL_VERSION=3.8.5
image: ghcr.io/youssef-harby/overturemapsdownloader:latest
command: ["jupyter", "notebook", "--port=8888", "--no-browser", "--ip=0.0.0.0", "--NotebookApp.token=''", "--allow-root"]
restart: unless-stopped
Expand Down
80 changes: 51 additions & 29 deletions overturemapsdownloader/dask_qrys.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,31 @@
import logging
import dask.dataframe as dd
import dask
import geopandas as gpd
import dask_geopandas as dgpd
from overturemapsdownloader.utils_helper import read_geospatial_data
from dask.diagnostics import ProgressBar
from shapely.geometry import Polygon, box

ProgressBar().register()
from dask.distributed import Client, LocalCluster

# cluster = LocalCluster()
# client = Client(cluster)
dask.config.set({"dataframe.query-planning": False})


def compute_dataframe(df):
try:
result = df.compute()
return result
except Exception as e:
logging.error(f"Error computing DataFrame: {str(e)}")
return None


def get_df_from_parquet(
parquet_path,
engine="pyarrow",
# columns=["geometry"], # comment to get all columns by default
storage_options={"anon": True},
parquet_file_extensions=False,
):
Expand All @@ -22,7 +36,6 @@ def get_df_from_parquet(
logging.info(f"Reading Parquet file from {parquet_path}")
df = dd.read_parquet(
parquet_path,
# columns=columns, # comment to get all columns by default
engine=engine,
index="id",
dtype_backend=engine,
Expand All @@ -35,48 +48,57 @@ def get_df_from_parquet(
return None


def make_gdf_from_df(df, crs=4326):
"""
Converts a Dask DataFrame to a Dask GeoDataFrame.
"""
geometry = (
df["geometry"]
.map_partitions(gpd.GeoSeries.from_wkb, meta=gpd.GeoSeries(name="geometry"))
.set_crs(crs)
)
return dgpd.from_dask_dataframe(df, geometry=geometry)
def make_gdf_from_df(df, crs="EPSG:4326"):
try:
if "geometry" in df.columns:
# Ensure the 'geometry' column is processed as expected
geometry = df["geometry"].map_partitions(
gpd.GeoSeries.from_wkb, meta=gpd.GeoSeries()
)
df["geometry"] = (
geometry # Explicitly assigning the processed column back to the DataFrame
)

# Convert to GeoDataFrame
gdf = dgpd.from_dask_dataframe(df, geometry="geometry")
gdf.crs = crs

# Debug output
print("Conversion successful, GeoDataFrame created.")
return gdf
else:
raise ValueError("Geometry column missing in DataFrame")
except Exception as e:
logging.error(f"Failed to convert DataFrame to GeoDataFrame: {str(e)}")
return None


def get_clipped_gdf(gdf, bbox_filter):
"""
Clips the GeoDataFrame using a bounding box.
"""
return gdf[gdf.geometry.within(bbox_filter)]
if isinstance(bbox_filter, tuple):
bbox_filter = box(*bbox_filter) # Create Polygon from tuple
elif isinstance(bbox_filter, Polygon):
bbox_filter = gpd.GeoSeries(
[bbox_filter]
) # Convert Polygon to GeoSeries if not already

local_gdf = gdf.compute() # Compute to get GeoDataFrame

clipped_gdf = local_gdf[local_gdf.geometry.within(bbox_filter.iloc[0])]
return clipped_gdf


if __name__ == "__main__":
logging.basicConfig(level=logging.INFO)

# TODO: Handle columns with official schemas
schema_yaml_path = "overturemapsdownloader/schemas/schema/places/place.yaml"

bbox_filter = read_geospatial_data(
"examples/bbox.geojson", as_shapely_str=True, output_format="Custom"
)
bbox_filter = (31.429, 29.998, 31.531, 30.102) # Example bbox coordinates

df = get_df_from_parquet(
parquet_path="s3://overturemaps-us-west-2/release/2023-07-26-alpha.0/theme=places/type=*/*",
# columns=get_columns_from_om_schema_yaml(schema_yaml_path),
)

if df is not None:
gdf = make_gdf_from_df(df)

# TODO: Add filter by country (also in config)
clipped_gdf = get_clipped_gdf(gdf, bbox_filter)

print(clipped_gdf.head())
else:
logging.error("Could not read the DataFrame from the Parquet file.")

# TODO: Write to file; Parquet by default. Allow user to convert to other formats (e.g., via ogr2ogr).
2 changes: 1 addition & 1 deletion overturemapsdownloader/schemas
Submodule schemas updated 298 files
Loading

0 comments on commit 124aabc

Please sign in to comment.