Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add streaming match generation #39

Merged
merged 1 commit into from
Feb 13, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
153 changes: 72 additions & 81 deletions grandiso/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,10 @@
"""

from typing import Dict, Generator, Hashable, List, Optional, Union, Tuple
from inspect import isclass
import itertools
from functools import lru_cache

import networkx as nx
from .queues import SimpleQueue

__version__ = "2.1.1"

Expand Down Expand Up @@ -162,12 +160,11 @@ def get_next_backbone_candidates(
)
# Let's return ALL possible node choices for this next_node. To do this
# without being an insane person, let's filter on max degree in host:
return [
{next_node: n}
for n in host.nodes
if is_node_attr_match(next_node, n, motif, host)
and is_node_structural_match(next_node, n, motif, host)
]
for n in host.nodes:
if is_node_attr_match(next_node, n, motif, host) \
and is_node_structural_match(next_node, n, motif, host):
yield {next_node: n}
return

else:
_nodes_with_greatest_backbone_count: List[str] = []
Expand Down Expand Up @@ -282,13 +279,12 @@ def get_next_backbone_candidates(
+ "empty backbone to this function?)"
)

tentative_results = [
{**backbone, next_node: c}
for c in candidate_nodes
if c not in backbone.values()
and is_node_attr_match(next_node, c, motif, host)
and is_node_structural_match(next_node, c, motif, host)
]
def tentative_results():
for c in candidate_nodes:
if c not in backbone.values() \
and is_node_attr_match(next_node, c, motif, host) \
and is_node_structural_match(next_node, c, motif, host):
yield {**backbone, next_node: c}

# One last filtering step here. This is to catch the cases where you have
# successfully mapped each node, and the final node has some valid
Expand All @@ -298,49 +294,49 @@ def get_next_backbone_candidates(
# in find_motifs that len(motif) == len(mapping), we will discover that the
# mapping is "complete" even though we haven't yet checked it at all.

monomorphism_candidates = []

for mapping in tentative_results:
if len(mapping) == len(motif):
if all(
[
host.has_edge(mapping[motif_u], mapping[motif_v])
and is_edge_attr_match(
(motif_u, motif_v),
(mapping[motif_u], mapping[motif_v]),
motif,
host,
)
for motif_u, motif_v in motif.edges
]
):
# This is a "complete" match!
monomorphism_candidates.append(mapping)
else:
# This is a partial match, so we'll continue building.
monomorphism_candidates.append(mapping)
def monomorphism_candidates():
for mapping in tentative_results():
if len(mapping) == len(motif):
if all(
[
host.has_edge(mapping[motif_u], mapping[motif_v])
and is_edge_attr_match(
(motif_u, motif_v),
(mapping[motif_u], mapping[motif_v]),
motif,
host,
)
for motif_u, motif_v in motif.edges
]
):
# This is a "complete" match!
yield mapping
else:
# This is a partial match, so we'll continue building.
yield mapping

if not isomorphisms_only:
return monomorphism_candidates
yield from monomorphism_candidates()
return

# Additionally, if isomorphisms_only == True, we can use this opportunity
# to confirm that no spurious edges exist in the induced subgraph:
isomorphism_candidates = []
for result in monomorphism_candidates:
for (motif_u, motif_v) in itertools.product(result.keys(), result.keys()):
# if the motif has this edge, then it doesn't rule any of the
# above results out as an isomorphism.
# if the motif does NOT have the edge, then NO RESULT may have
# the equivalent edge in the host graph:
if not motif.has_edge(motif_u, motif_v) and host.has_edge(
result[motif_u], result[motif_v]
):
# this is a violation.
break
else:
isomorphism_candidates.append(result)
return isomorphism_candidates
def isomorphism_candidates():
Copy link
Member

@j6k4m8 j6k4m8 Feb 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry for the quiet; doing a little benchmarking right now on the performance implications of calling a nested func vs unwrapping these!

[edit] (i.e. i totally agree that generators vs list comprehensions is a huge improvement if you abort early; but i'm less sure about if you still run to completion)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good. I tried to make the change as minimally invasive as possible but I agree that having them as separate functions would be cleaner.

Let me know if you come across anything that seems slower. In the worst case scenario, it goes through all the data and discovers there are no matches (i.e. a full scan).

I also think the grand project is a pretty cool idea, putting a networkx interface on different backends such as a SQL database. I could see where indexes could make full scan operations much faster.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@davidmezzetti I got about +10μs benchmarking locally with ASV, which I think is fair to say is in the noise :) nice work!

I'd LOVE to have better indexing in Grand; I've been aiming for feature-completeness first and then adding in "shortcuts" and optimizations; would love to chat about some gameplans if you're interested!

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds good to me. I think having a networkx dialect applied towards something like a SQLite backend with indexes could be a powerful in-memory graph solution.

Things like an attribute search could return immediately regardless if there was/wasn't data for example (i.e. no full scans). In the case of SQLite, you'd get the native code and smart memory management for free. But still have the ease-of-use of networkx.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

COMPLETELY agree, would love to have a sqlite true high speed implementation!

for result in monomorphism_candidates():
for (motif_u, motif_v) in itertools.product(result.keys(), result.keys()):
# if the motif has this edge, then it doesn't rule any of the
# above results out as an isomorphism.
# if the motif does NOT have the edge, then NO RESULT may have
# the equivalent edge in the host graph:
if not motif.has_edge(motif_u, motif_v) and host.has_edge(
result[motif_u], result[motif_v]
):
# this is a violation.
break
else:
yield result

yield from isomorphism_candidates()

def uniform_node_interestingness(motif: nx.Graph) -> dict:
"""
Expand All @@ -358,7 +354,6 @@ def find_motifs_iter(
host: nx.Graph,
interestingness: dict = None,
directed: bool = None,
queue_=SimpleQueue,
isomorphisms_only: bool = False,
hints: List[Dict[Hashable, Hashable]] = None,
is_node_structural_match=_is_node_structural_match,
Expand All @@ -381,7 +376,6 @@ def find_motifs_iter(
number that indicates an ordinality in which to address each node
directed (bool: None): Whether direction should be considered during
search. If omitted, this will be based upon the motif directedness.
queue_ (queue.SimpleQueue): What kind of queue to use.
hints (dict): A dictionary of initial starting mappings. By default,
searches for all instances. You can constrain a node by passing a
list with a single dict item: `[{motifId: hostId}]`.
Expand All @@ -401,35 +395,32 @@ def find_motifs_iter(
else:
directed = False

q = queue_() if isclass(queue_) else queue_

# Kick off the queue with an empty candidate:
if hints is None or hints == []:
q.put({})
else:
for hint in hints:
q.put(hint)

while not q.empty():
new_backbone = q.get()
next_candidate_backbones = get_next_backbone_candidates(
new_backbone,
motif,
host,
interestingness,
directed=directed,
isomorphisms_only=isomorphisms_only,
is_node_structural_match=is_node_structural_match,
is_node_attr_match=is_node_attr_match,
is_edge_attr_match=is_edge_attr_match,
)
# List of starting paths, defaults to searching all instances if hints is empty
paths = hints if hints else [{}]

for candidate in next_candidate_backbones:
if len(candidate) == len(motif):
yield candidate
else:
q.put(candidate)
# Graph path traversal function
def walk(path):
if path and len(path) == len(motif):
# Path complete
yield path
else:
# Iterate over path candidates
for candidate in get_next_backbone_candidates(
path,
motif,
host,
interestingness,
directed=directed,
isomorphisms_only=isomorphisms_only,
is_node_structural_match=is_node_structural_match,
is_node_attr_match=is_node_attr_match,
is_edge_attr_match=is_edge_attr_match,
):
yield from walk(candidate)

# Traverse graph and yield mappings
for path in paths:
yield from walk(path)

def find_motifs(
motif: nx.Graph,
Expand Down
Loading