Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

lint graph/_*.py #618

Merged
merged 6 commits into from
Oct 31, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions libpysal/graph/_contiguity.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from collections import defaultdict

import geopandas
import numpy
import shapely
import pandas
import geopandas
import shapely
from packaging.version import Version

from ._utils import _neighbor_dict_to_edges, _validate_geometry_input, _resolve_islands
from ._utils import _neighbor_dict_to_edges, _resolve_islands, _validate_geometry_input

GPD_013 = Version(geopandas.__version__) >= Version("0.13")

Expand Down Expand Up @@ -43,9 +43,9 @@ def _vertex_set_intersection(geoms, rook=True, ids=None, by_perimeter=False):
)

# initialise the target map
graph = dict()
graph = {}
for idx in ids:
graph[idx] = set([idx])
graph[idx] = {idx}

# get all of the vertices for the input
vertices, offsets = shapely.get_coordinates(geoms.geometry, return_index=True)
Expand Down
107 changes: 57 additions & 50 deletions libpysal/graph/_kernel.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
import numpy
from scipy import sparse, optimize, spatial, stats
from scipy import optimize, sparse, spatial, stats

from ._utils import (
_validate_geometry_input,
_build_coincidence_lookup,
_induce_cliques,
_jitter_geoms,
_sparse_to_arrays,
_resolve_islands,
_sparse_to_arrays,
_validate_geometry_input,
)

try:
from sklearn import neighbors, metrics
from sklearn import metrics, neighbors

HAS_SKLEARN = True
except ImportError:
Expand Down Expand Up @@ -50,7 +49,7 @@
return r


def _identity(distances, bandwidth):
def _identity(distances, _):
return distances


Expand Down Expand Up @@ -134,24 +133,25 @@
if ids is None:
ids = numpy.arange(coordinates.shape[0])

if metric == "haversine":
if not (
if (
metric == "haversine"
and not (
(coordinates[:, 0] > -180)
& (coordinates[:, 0] < 180)
& (coordinates[:, 1] > -90)
& (coordinates[:, 1] < 90)
).all():
raise ValueError(
"'haversine' metric is limited to the range of "
"latitude coordinates (-90, 90) and the range of "
"longitude coordinates (-180, 180)."
)
).all()
):
raise ValueError(
"'haversine' metric is limited to the range of latitude coordinates "
"(-90, 90) and the range of longitude coordinates (-180, 180)."
)

if k is not None:
if metric != "precomputed":
D = _knn(coordinates, k=k, metric=metric, p=p, coincident=coincident)
d = _knn(coordinates, k=k, metric=metric, p=p, coincident=coincident)
else:
D = coordinates * (coordinates.argsort(axis=1, kind="stable") < (k + 1))
d = coordinates * (coordinates.argsort(axis=1, kind="stable") < (k + 1))

Check warning on line 154 in libpysal/graph/_kernel.py

View check run for this annotation

Codecov / codecov/patch

libpysal/graph/_kernel.py#L154

Added line #L154 was not covered by tests
else:
if metric != "precomputed":
dist_kwds = {}
Expand All @@ -167,8 +167,8 @@
f"metric {metric} is not supported by scipy, and scikit-learn "
"could not be imported."
)
D = spatial.distance.pdist(coordinates, metric=metric, **dist_kwds)
sq = spatial.distance.squareform(D)
d = spatial.distance.pdist(coordinates, metric=metric, **dist_kwds)
sq = spatial.distance.squareform(d)

# ensure that self-distance is dropped but 0 between co-located pts not
# get data and ids for sparse constructor
Expand All @@ -180,31 +180,31 @@
i = numpy.delete(i, numpy.arange(0, i.size, sq.shape[0] + 1))
j = numpy.delete(j, numpy.arange(0, j.size, sq.shape[0] + 1))
# construct sparse
D = sparse.csc_array((data, (i, j)))
d = sparse.csc_array((data, (i, j)))
else:
D = sparse.csc_array(coordinates)
d = sparse.csc_array(coordinates)
if bandwidth is None:
bandwidth = numpy.percentile(D.data, 25)
bandwidth = numpy.percentile(d.data, 25)
elif bandwidth == "auto":
if (kernel == "identity") or (kernel is None):
bandwidth = numpy.nan # ignored by identity
else:
bandwidth = _optimize_bandwidth(D, kernel)
bandwidth = _optimize_bandwidth(d, kernel)

Check warning on line 192 in libpysal/graph/_kernel.py

View check run for this annotation

Codecov / codecov/patch

libpysal/graph/_kernel.py#L192

Added line #L192 was not covered by tests
if callable(kernel):
D.data = kernel(D.data, bandwidth)
d.data = kernel(d.data, bandwidth)
else:
D.data = _kernel_functions[kernel](D.data, bandwidth)
d.data = _kernel_functions[kernel](d.data, bandwidth)

if taper:
D.eliminate_zeros()
d.eliminate_zeros()

heads, tails, weights = _sparse_to_arrays(D, ids=ids)
heads, tails, weights = _sparse_to_arrays(d, ids=ids)

return _resolve_islands(heads, tails, ids, weights)


def _knn(coordinates, metric="euclidean", k=1, p=2, coincident="raise"):
"""internal function called only from within _kernel, never directly to build KNN"""
"""internal function called only within _kernel, never directly to build KNN"""
coordinates, ids, geoms = _validate_geometry_input(
coordinates, ids=None, valid_geometry_types=_VALID_GEOMETRY_TYPES
)
Expand All @@ -217,29 +217,29 @@
# sklearn haversine works with (lat,lng) in radians...
coordinates = numpy.fliplr(numpy.deg2rad(coordinates))
query = _prepare_tree_query(coordinates, metric, p=p)
D_linear, ixs = query(coordinates, k=k + 1)
d_linear, ixs = query(coordinates, k=k + 1)
self_ix, neighbor_ix = ixs[:, 0], ixs[:, 1:]
D_linear = D_linear[:, 1:]
d_linear = d_linear[:, 1:]
self_ix_flat = numpy.repeat(self_ix, k)
neighbor_ix_flat = neighbor_ix.flatten()
D_linear_flat = D_linear.flatten()
d_linear_flat = d_linear.flatten()
if metric == "haversine":
D_linear_flat * 6371 # express haversine distances in kilometers
D = sparse.csr_array(
(D_linear_flat, (self_ix_flat, neighbor_ix_flat)),
d_linear_flat * 6371 # express haversine distances in kilometers
d = sparse.csr_array(
(d_linear_flat, (self_ix_flat, neighbor_ix_flat)),
shape=(n_samples, n_samples),
)
return D
return d

else:
if coincident == "raise":
raise ValueError(
f"There are {len(coincident_lut)} "
f"unique locations in the dataset, but {len(coordinates)} observations. "
f"At least one of these sites has {max_at_one_site} points, more than the "
f"{k} nearest neighbors requested. This means there are more than {k} points "
"in the same location, which makes this graph type undefined. To address "
"this issue, consider setting `coincident='clique' or consult the "
f"There are {len(coincident_lut)} unique locations in the dataset, "
f"but {len(coordinates)} observations. At least one of these sites "
f"has {max_at_one_site} points, more than the {k} nearest neighbors "
f"requested. This means there are more than {k} points in the same "
"location, which makes this graph type undefined. To address "
"this issue, consider setting `coincident='clique'` or consult the "
"documentation about coincident points."
)
if coincident == "jitter":
Expand All @@ -258,7 +258,13 @@
)
# # implicit coincident == "clique"
# heads, tails, weights = _sparse_to_arrays(
# _knn(coincident_lut.geometry, metric=metric, k=k, p=p, coincident="raise")
# _knn(
# coincident_lut.geometry,
# metric=metric,
# k=k,
# p=p,
# coincident="raise"
# )
# )
# adjtable = pandas.DataFrame.from_dict(
# dict(focal=heads, neighbor=tails, weight=weights)
Expand Down Expand Up @@ -300,7 +306,7 @@
return tree(coordinates, metric=metric, **dist_kwds).query
else:
if metric in ("euclidean", "manhattan", "cityblock", "minkowski"):
from scipy.spatial import KDTree as tree
from scipy.spatial import KDTree as tree # noqa N813

tree_ = tree(coordinates)
jGaboardi marked this conversation as resolved.
Show resolved Hide resolved
p = {"euclidean": 2, "manhattan": 1, "cityblock": 1, "minkowski": p}[metric]
Expand All @@ -316,7 +322,7 @@
)


def _optimize_bandwidth(D, kernel):
def _optimize_bandwidth(d, kernel):
"""
Optimize the bandwidth as a function of entropy for a given kernel function.

Expand All @@ -326,16 +332,17 @@
"moderate" level of smoothing.
"""
kernel_function = _kernel_functions.get(kernel, kernel)
assert callable(
kernel_function
), f"kernel {kernel} was not in supported kernel types {_kernel_functions.keys()} or callable"
assert callable(kernel_function), (

Check warning on line 335 in libpysal/graph/_kernel.py

View check run for this annotation

Codecov / codecov/patch

libpysal/graph/_kernel.py#L335

Added line #L335 was not covered by tests
f"kernel {kernel} was not in supported kernel types "
f"{_kernel_functions.keys()} or callable"
)

def _loss(bandwidth, D=D, kernel_function=kernel_function):
Ku = kernel_function(D.data, bandwidth)
bins, _ = numpy.histogram(Ku, bins=int(D.shape[0] ** 0.5), range=(0, 1))
def _loss(bandwidth, d=d, kernel_function=kernel_function):
k_u = kernel_function(d.data, bandwidth)
bins, _ = numpy.histogram(k_u, bins=int(d.shape[0] ** 0.5), range=(0, 1))

Check warning on line 342 in libpysal/graph/_kernel.py

View check run for this annotation

Codecov / codecov/patch

libpysal/graph/_kernel.py#L340-L342

Added lines #L340 - L342 were not covered by tests
return -stats.entropy(bins / bins.sum())

xopt = optimize.minimize_scalar(
_loss, bounds=(0, D.data.max() * 2), method="bounded"
_loss, bounds=(0, d.data.max() * 2), method="bounded"
)
return xopt.x
11 changes: 6 additions & 5 deletions libpysal/graph/_parquet.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import libpysal
import json

import libpysal


def _to_parquet(G, destination, **kwargs):
def _to_parquet(G, destination, **kwargs): # noqa N803
martinfleis marked this conversation as resolved.
Show resolved Hide resolved
"""Save adjacency as a Parquet table and add custom metadata

Metadata contain transformation and the libpysal version used to save the file.
Expand All @@ -22,7 +23,7 @@
import pyarrow as pa
import pyarrow.parquet as pq
except (ImportError, ModuleNotFoundError):
raise ImportError("pyarrow is required for `to_parquet`.")
raise ImportError("pyarrow is required for `to_parquet`.") from None

Check warning on line 26 in libpysal/graph/_parquet.py

View check run for this annotation

Codecov / codecov/patch

libpysal/graph/_parquet.py#L26

Added line #L26 was not covered by tests
table = pa.Table.from_pandas(G._adjacency.to_frame())

meta = table.schema.metadata
Expand Down Expand Up @@ -51,10 +52,10 @@
try:
import pyarrow.parquet as pq
except (ImportError, ModuleNotFoundError):
raise ImportError("pyarrow is required for `read_parquet`.")
raise ImportError("pyarrow is required for `read_parquet`.") from None

Check warning on line 55 in libpysal/graph/_parquet.py

View check run for this annotation

Codecov / codecov/patch

libpysal/graph/_parquet.py#L55

Added line #L55 was not covered by tests

table = pq.read_table(source, **kwargs)
if b"libpysal" in table.schema.metadata.keys():
if b"libpysal" in table.schema.metadata:
meta = json.loads(table.schema.metadata[b"libpysal"])
transformation = meta["transformation"]
else:
Expand Down
12 changes: 6 additions & 6 deletions libpysal/graph/_plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


def _plot(
G,
G, # noqa N803
martinfleis marked this conversation as resolved.
Show resolved Hide resolved
gdf,
focal=None,
nodes=True,
Expand Down Expand Up @@ -80,17 +80,17 @@ def _plot(
if "color" not in node_kws:
node_kws["color"] = color
else:
node_kws = dict(color=color)
node_kws = {"color": color}

if edge_kws is not None:
if "color" not in edge_kws:
edge_kws["color"] = color
else:
edge_kws = dict(color=color)
edge_kws = {"color": color}

# get array of coordinates in the order reflecting G._adjacency.index.codes
# we need to work on int position to allow fast filtering of duplicated edges and
# cannot rely on gdf remaining in the same order between Graph creation and plotting
# get array of coordinates in the order reflecting G._adjacency.index.codes we need
# to work on int position to allow fast filtering of duplicated edges and cannot
# rely on gdf remaining in the same order between Graph creation and plotting
coords = shapely.get_coordinates(gdf.reindex(G.unique_ids).centroid)

if focal is not None:
Expand Down
22 changes: 13 additions & 9 deletions libpysal/graph/_set_ops.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,11 @@
import numpy as np
import pandas

from packaging.version import Version

from ._utils import _resolve_islands


class _Set_Mixin:
class SetOpsMixin:
"""
This implements common useful set operations on weights and dunder methods.
"""
Expand Down Expand Up @@ -53,8 +52,8 @@ def __len__(self):

def intersects(self, right):
"""
Returns True if left and right share at least one link, irrespective of weights
value.
Returns True if left and right share at least one link,
irrespective of weights value.
"""
intersection = self._adjacency.index.drop(self.isolates).intersection(
right._adjacency.index.drop(right.isolates)
Expand Down Expand Up @@ -108,13 +107,15 @@ def symmetric_difference(self, right):

def union(self, right):
"""
Provide the union of two Graph objects, collecing all links that are in either graph.
Provide the union of two Graph objects,
collecing all links that are in either graph.
"""
from .base import Graph

if not (self.unique_ids == right.unique_ids).all():
raise ValueError(
"Cannot do union of Graphs that are based on different sets of unique IDs."
"Cannot do union of Graphs that are "
"based on different sets of unique IDs."
)

union = self._adjacency.index.drop(self.isolates).union(
Expand All @@ -131,8 +132,9 @@ def union(self, right):

def difference(self, right):
"""
Provide the set difference between the graph on the left and the graph on the right.
This returns all links in the left graph that are not in the right graph.
Provide the set difference between the graph on
the left and the graph on the right. This returns
all links in the left graph that are not in the right graph.
"""
from .base import Graph

Expand Down Expand Up @@ -193,7 +195,9 @@ def isomorphic(self, right):
try:
import networkx as nx
except ImportError:
raise ImportError("NetworkX is required to check for graph isomorphism")
raise ImportError(
"NetworkX is required to check for graph isomorphism"
) from None

nxleft = self.to_networkx()
nxright = right.to_networkx()
Expand Down
6 changes: 4 additions & 2 deletions libpysal/graph/_spatial_lag.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ def _lag_spatial(graph, y):
"""
sp = graph.sparse
if len(y) != sp.shape[0]:
raise ValueError("The length of `y` needs to match the number of observations "
f"in Graph. Expected {sp.shape[0]}, got {len(y)}.")
raise ValueError(
"The length of `y` needs to match the number of observations "
f"in Graph. Expected {sp.shape[0]}, got {len(y)}."
)
return graph.sparse @ y
Loading
Loading