Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lint graph/_*.py #618

Merged
merged 6 commits into from
Oct 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions libpysal/graph/_contiguity.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from collections import defaultdict

import geopandas
import numpy
import shapely
import pandas
import geopandas
import shapely
from packaging.version import Version

from ._utils import _neighbor_dict_to_edges, _validate_geometry_input, _resolve_islands
from ._utils import _neighbor_dict_to_edges, _resolve_islands, _validate_geometry_input

GPD_013 = Version(geopandas.__version__) >= Version("0.13")

Expand Down Expand Up @@ -43,9 +43,9 @@ def _vertex_set_intersection(geoms, rook=True, ids=None, by_perimeter=False):
)

# initialise the target map
graph = dict()
graph = {}
for idx in ids:
graph[idx] = set([idx])
graph[idx] = {idx}

# get all of the vertices for the input
vertices, offsets = shapely.get_coordinates(geoms.geometry, return_index=True)
Expand Down
109 changes: 57 additions & 52 deletions libpysal/graph/_kernel.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
import numpy
from scipy import sparse, optimize, spatial, stats
from scipy import optimize, sparse, spatial, stats

from ._utils import (
_validate_geometry_input,
_build_coincidence_lookup,
_induce_cliques,
_jitter_geoms,
_sparse_to_arrays,
_resolve_islands,
_sparse_to_arrays,
_validate_geometry_input,
)

try:
from sklearn import neighbors, metrics
from sklearn import metrics, neighbors

HAS_SKLEARN = True
except ImportError:
Expand Down Expand Up @@ -50,7 +49,7 @@
return r


def _identity(distances, bandwidth):
def _identity(distances, _):
return distances


Expand Down Expand Up @@ -134,24 +133,25 @@
if ids is None:
ids = numpy.arange(coordinates.shape[0])

if metric == "haversine":
if not (
if (
metric == "haversine"
and not (
(coordinates[:, 0] > -180)
& (coordinates[:, 0] < 180)
& (coordinates[:, 1] > -90)
& (coordinates[:, 1] < 90)
).all():
raise ValueError(
"'haversine' metric is limited to the range of "
"latitude coordinates (-90, 90) and the range of "
"longitude coordinates (-180, 180)."
)
).all()
):
raise ValueError(
"'haversine' metric is limited to the range of latitude coordinates "
"(-90, 90) and the range of longitude coordinates (-180, 180)."
)

if k is not None:
if metric != "precomputed":
D = _knn(coordinates, k=k, metric=metric, p=p, coincident=coincident)
d = _knn(coordinates, k=k, metric=metric, p=p, coincident=coincident)
else:
D = coordinates * (coordinates.argsort(axis=1, kind="stable") < (k + 1))
d = coordinates * (coordinates.argsort(axis=1, kind="stable") < (k + 1))

Check warning on line 154 in libpysal/graph/_kernel.py

View check run for this annotation

Codecov / codecov/patch

libpysal/graph/_kernel.py#L154

Added line #L154 was not covered by tests
else:
if metric != "precomputed":
dist_kwds = {}
Expand All @@ -167,8 +167,8 @@
f"metric {metric} is not supported by scipy, and scikit-learn "
"could not be imported."
)
D = spatial.distance.pdist(coordinates, metric=metric, **dist_kwds)
sq = spatial.distance.squareform(D)
d = spatial.distance.pdist(coordinates, metric=metric, **dist_kwds)
sq = spatial.distance.squareform(d)

# ensure that self-distance is dropped but 0 between co-located pts not
# get data and ids for sparse constructor
Expand All @@ -180,31 +180,31 @@
i = numpy.delete(i, numpy.arange(0, i.size, sq.shape[0] + 1))
j = numpy.delete(j, numpy.arange(0, j.size, sq.shape[0] + 1))
# construct sparse
D = sparse.csc_array((data, (i, j)))
d = sparse.csc_array((data, (i, j)))
else:
D = sparse.csc_array(coordinates)
d = sparse.csc_array(coordinates)
if bandwidth is None:
bandwidth = numpy.percentile(D.data, 25)
bandwidth = numpy.percentile(d.data, 25)
elif bandwidth == "auto":
if (kernel == "identity") or (kernel is None):
bandwidth = numpy.nan # ignored by identity
else:
bandwidth = _optimize_bandwidth(D, kernel)
bandwidth = _optimize_bandwidth(d, kernel)

Check warning on line 192 in libpysal/graph/_kernel.py

View check run for this annotation

Codecov / codecov/patch

libpysal/graph/_kernel.py#L192

Added line #L192 was not covered by tests
if callable(kernel):
D.data = kernel(D.data, bandwidth)
d.data = kernel(d.data, bandwidth)
else:
D.data = _kernel_functions[kernel](D.data, bandwidth)
d.data = _kernel_functions[kernel](d.data, bandwidth)

if taper:
D.eliminate_zeros()
d.eliminate_zeros()

heads, tails, weights = _sparse_to_arrays(D, ids=ids)
heads, tails, weights = _sparse_to_arrays(d, ids=ids)

return _resolve_islands(heads, tails, ids, weights)


def _knn(coordinates, metric="euclidean", k=1, p=2, coincident="raise"):
"""internal function called only from within _kernel, never directly to build KNN"""
"""internal function called only within _kernel, never directly to build KNN"""
coordinates, ids, geoms = _validate_geometry_input(
coordinates, ids=None, valid_geometry_types=_VALID_GEOMETRY_TYPES
)
Expand All @@ -217,29 +217,29 @@
# sklearn haversine works with (lat,lng) in radians...
coordinates = numpy.fliplr(numpy.deg2rad(coordinates))
query = _prepare_tree_query(coordinates, metric, p=p)
D_linear, ixs = query(coordinates, k=k + 1)
d_linear, ixs = query(coordinates, k=k + 1)
self_ix, neighbor_ix = ixs[:, 0], ixs[:, 1:]
D_linear = D_linear[:, 1:]
d_linear = d_linear[:, 1:]
self_ix_flat = numpy.repeat(self_ix, k)
neighbor_ix_flat = neighbor_ix.flatten()
D_linear_flat = D_linear.flatten()
d_linear_flat = d_linear.flatten()
if metric == "haversine":
D_linear_flat * 6371 # express haversine distances in kilometers
D = sparse.csr_array(
(D_linear_flat, (self_ix_flat, neighbor_ix_flat)),
d_linear_flat * 6371 # express haversine distances in kilometers
d = sparse.csr_array(
(d_linear_flat, (self_ix_flat, neighbor_ix_flat)),
shape=(n_samples, n_samples),
)
return D
return d

else:
if coincident == "raise":
raise ValueError(
f"There are {len(coincident_lut)} "
f"unique locations in the dataset, but {len(coordinates)} observations. "
f"At least one of these sites has {max_at_one_site} points, more than the "
f"{k} nearest neighbors requested. This means there are more than {k} points "
"in the same location, which makes this graph type undefined. To address "
"this issue, consider setting `coincident='clique' or consult the "
f"There are {len(coincident_lut)} unique locations in the dataset, "
f"but {len(coordinates)} observations. At least one of these sites "
f"has {max_at_one_site} points, more than the {k} nearest neighbors "
f"requested. This means there are more than {k} points in the same "
"location, which makes this graph type undefined. To address "
"this issue, consider setting `coincident='clique'` or consult the "
"documentation about coincident points."
)
if coincident == "jitter":
Expand All @@ -258,7 +258,13 @@
)
# # implicit coincident == "clique"
# heads, tails, weights = _sparse_to_arrays(
# _knn(coincident_lut.geometry, metric=metric, k=k, p=p, coincident="raise")
# _knn(
# coincident_lut.geometry,
# metric=metric,
# k=k,
# p=p,
# coincident="raise"
# )
# )
# adjtable = pandas.DataFrame.from_dict(
# dict(focal=heads, neighbor=tails, weight=weights)
Expand Down Expand Up @@ -300,9 +306,7 @@
return tree(coordinates, metric=metric, **dist_kwds).query
else:
if metric in ("euclidean", "manhattan", "cityblock", "minkowski"):
from scipy.spatial import KDTree as tree

tree_ = tree(coordinates)
tree_ = spatial.KDTree(coordinates)
p = {"euclidean": 2, "manhattan": 1, "cityblock": 1, "minkowski": p}[metric]

def query(target, k):
Expand All @@ -316,7 +320,7 @@
)


def _optimize_bandwidth(D, kernel):
def _optimize_bandwidth(d, kernel):
"""
Optimize the bandwidth as a function of entropy for a given kernel function.

Expand All @@ -326,16 +330,17 @@
"moderate" level of smoothing.
"""
kernel_function = _kernel_functions.get(kernel, kernel)
assert callable(
kernel_function
), f"kernel {kernel} was not in supported kernel types {_kernel_functions.keys()} or callable"
assert callable(kernel_function), (

Check warning on line 333 in libpysal/graph/_kernel.py

View check run for this annotation

Codecov / codecov/patch

libpysal/graph/_kernel.py#L333

Added line #L333 was not covered by tests
f"kernel {kernel} was not in supported kernel types "
f"{_kernel_functions.keys()} or callable"
)

def _loss(bandwidth, D=D, kernel_function=kernel_function):
Ku = kernel_function(D.data, bandwidth)
bins, _ = numpy.histogram(Ku, bins=int(D.shape[0] ** 0.5), range=(0, 1))
def _loss(bandwidth, d=d, kernel_function=kernel_function):
k_u = kernel_function(d.data, bandwidth)
bins, _ = numpy.histogram(k_u, bins=int(d.shape[0] ** 0.5), range=(0, 1))

Check warning on line 340 in libpysal/graph/_kernel.py

View check run for this annotation

Codecov / codecov/patch

libpysal/graph/_kernel.py#L338-L340

Added lines #L338 - L340 were not covered by tests
return -stats.entropy(bins / bins.sum())

xopt = optimize.minimize_scalar(
_loss, bounds=(0, D.data.max() * 2), method="bounded"
_loss, bounds=(0, d.data.max() * 2), method="bounded"
)
return xopt.x
17 changes: 9 additions & 8 deletions libpysal/graph/_parquet.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import libpysal
import json

import libpysal


def _to_parquet(G, destination, **kwargs):
def _to_parquet(graph_obj, destination, **kwargs):
"""Save adjacency as a Parquet table and add custom metadata

Metadata contain transformation and the libpysal version used to save the file.
Expand All @@ -11,7 +12,7 @@

Parameters
----------
G : Graph
graph_obj : Graph
Graph to be saved
destination : str | pyarrow.NativeFile
path or any stream supported by pyarrow
Expand All @@ -22,11 +23,11 @@
import pyarrow as pa
import pyarrow.parquet as pq
except (ImportError, ModuleNotFoundError):
raise ImportError("pyarrow is required for `to_parquet`.")
table = pa.Table.from_pandas(G._adjacency.to_frame())
raise ImportError("pyarrow is required for `to_parquet`.") from None

Check warning on line 26 in libpysal/graph/_parquet.py

View check run for this annotation

Codecov / codecov/patch

libpysal/graph/_parquet.py#L26

Added line #L26 was not covered by tests
table = pa.Table.from_pandas(graph_obj._adjacency.to_frame())

meta = table.schema.metadata
d = {"transformation": G.transformation, "version": libpysal.__version__}
d = {"transformation": graph_obj.transformation, "version": libpysal.__version__}
meta[b"libpysal"] = json.dumps(d).encode("utf-8")
schema = table.schema.with_metadata(meta)

Expand All @@ -51,10 +52,10 @@
try:
import pyarrow.parquet as pq
except (ImportError, ModuleNotFoundError):
raise ImportError("pyarrow is required for `read_parquet`.")
raise ImportError("pyarrow is required for `read_parquet`.") from None

Check warning on line 55 in libpysal/graph/_parquet.py

View check run for this annotation

Codecov / codecov/patch

libpysal/graph/_parquet.py#L55

Added line #L55 was not covered by tests

table = pq.read_table(source, **kwargs)
if b"libpysal" in table.schema.metadata.keys():
if b"libpysal" in table.schema.metadata:
meta = json.loads(table.schema.metadata[b"libpysal"])
transformation = meta["transformation"]
else:
Expand Down
19 changes: 10 additions & 9 deletions libpysal/graph/_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


def _plot(
G,
graph_obj,
gdf,
focal=None,
nodes=True,
Expand All @@ -23,7 +23,7 @@ def _plot(

Parameters
----------
G : Graph
graph_obj : Graph
Graph to be plotted
gdf : geopandas.GeoDataFrame
Geometries indexed using the same index as Graph. Geometry types other than
Expand Down Expand Up @@ -80,27 +80,28 @@ def _plot(
if "color" not in node_kws:
node_kws["color"] = color
else:
node_kws = dict(color=color)
node_kws = {"color": color}

if edge_kws is not None:
if "color" not in edge_kws:
edge_kws["color"] = color
else:
edge_kws = dict(color=color)
edge_kws = {"color": color}

# get array of coordinates in the order reflecting G._adjacency.index.codes
# get array of coordinates in the order reflecting graph_obj._adjacency.index.codes
# we need to work on int position to allow fast filtering of duplicated edges and
# cannot rely on gdf remaining in the same order between Graph creation and plotting
coords = shapely.get_coordinates(gdf.reindex(G.unique_ids).centroid)
# cannot rely on gdf remaining in the same order between Graph creation and
# plotting
coords = shapely.get_coordinates(gdf.reindex(graph_obj.unique_ids).centroid)

if focal is not None:
if not pd.api.types.is_list_like(focal):
focal = [focal]
subset = G._adjacency[focal]
subset = graph_obj._adjacency[focal]
codes = subset.index.codes

else:
codes = G._adjacency.index.codes
codes = graph_obj._adjacency.index.codes

# avoid plotting both ij and ji
edges = np.unique(np.sort(np.column_stack([codes]).T, axis=1), axis=0)
Expand Down
Loading
Loading