Skip to content
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
a0363c0
Add crawler
samwaseda Feb 23, 2026
f9488a2
Add docstring
samwaseda Feb 23, 2026
c32f720
Remove function_id
samwaseda Feb 23, 2026
2bf31ea
Add tests
samwaseda Feb 23, 2026
9776b8a
black
samwaseda Feb 23, 2026
8f00057
Add more tests
samwaseda Feb 23, 2026
bcf2973
Remove the part with closure
samwaseda Feb 23, 2026
756207c
Update environemnt
samwaseda Feb 23, 2026
fbdb91b
[dependabot skip] Update env file
pyiron-runner Feb 23, 2026
1990a4d
ruff
samwaseda Feb 23, 2026
53162c0
Merge branch 'main' into crawler
liamhuber Feb 24, 2026
09cd047
Use google scoping style
liamhuber Feb 24, 2026
38aa637
Use object_scope
liamhuber Feb 24, 2026
ddccbdf
Add hints
liamhuber Feb 24, 2026
5ca15c3
Do it in a single recursive function
liamhuber Feb 24, 2026
cdf03d5
Add tests
liamhuber Feb 24, 2026
ac96a5e
Use google scoping style
liamhuber Feb 24, 2026
ad8f016
Merge branch 'crawler' into crawler-edits
samwaseda Feb 24, 2026
b1110ee
Get rid of what was before
samwaseda Feb 24, 2026
21eeb49
Merge pull request #156 from pyiron/crawler-edits
samwaseda Feb 24, 2026
b5e02da
Apparently I had failed to push the changes...
samwaseda Feb 24, 2026
1e8d56e
Merge branch 'crawler' of github.com:pyiron/flowrep into crawler
samwaseda Feb 24, 2026
f6ba6b6
[ruff] remove unused math
samwaseda Feb 24, 2026
dc369fd
Make key-function pairs and not key-list[function]
samwaseda Feb 24, 2026
b4cd1c3
black
samwaseda Feb 24, 2026
eac82eb
ruff and mypy
samwaseda Feb 24, 2026
44a361a
Merge branch 'main' into crawler
liamhuber Feb 24, 2026
90c5538
Remove unused test functions
liamhuber Feb 24, 2026
befd245
Update docstring
liamhuber Feb 24, 2026
06ec56d
Rename variable
liamhuber Feb 24, 2026
850ab4e
Update flowrep/crawler.py
samwaseda Feb 24, 2026
fd5ec56
Merge remote-tracking branch 'origin/crawler' into crawler
liamhuber Feb 24, 2026
ef89712
Transform continue guard to failure
liamhuber Feb 24, 2026
b880f18
Extend tests
liamhuber Feb 24, 2026
4aafbd3
Move and rename
liamhuber Feb 24, 2026
ad7af9c
Add comment
liamhuber Feb 24, 2026
d81e55e
Add test
liamhuber Feb 24, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 100 additions & 0 deletions flowrep/crawler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import ast
import types
from collections.abc import Callable

from pyiron_snippets import versions

from flowrep.models.parsers import object_scope, parser_helpers

CallDependencies = dict[versions.VersionInfo, Callable]


def get_call_dependencies(
func: types.FunctionType,
version_scraping: versions.VersionScrapingMap | None = None,
_call_dependencies: CallDependencies | None = None,
_visited: set[str] | None = None,
) -> CallDependencies:
"""
Recursively collect all callable dependencies of *func* via AST introspection.

Each dependency is keyed by its :class:`~pyiron_snippets.versions.VersionInfo`
and maps to the list of concrete callables sharing that identity. The search
is depth-first: for every resolved callee that is a
:class:`~types.FunctionType` (i.e. has inspectable source), the function
recurses into the callee's own scope.

Args:
func: The function whose call-graph to analyse.
version_scraping (VersionScrapingMap | None): Since some modules may store
their version in other ways, this provides an optional map between module
names and callables to leverage for extracting that module's version.
_call_dependencies: Accumulator for recursive calls — do not pass manually.
_visited: Fully-qualified names already traversed — do not pass manually.

Returns:
A mapping from :class:`VersionInfo` to the callables found under that
identity across the entire (sub-)tree.
"""
call_dependencies: CallDependencies = _call_dependencies or {}
visited: set[str] = _visited or set()

func_fqn = versions.VersionInfo.of(func).fully_qualified_name
if func_fqn in visited:
return call_dependencies
visited.add(func_fqn)

scope = object_scope.get_scope(func)
tree = parser_helpers.get_ast_function_node(func)
collector = CallCollector()
collector.visit(tree)

for call in collector.calls:
try:
caller = object_scope.resolve_symbol_to_object(call, scope)
except (ValueError, TypeError):
continue

if not callable(caller):
continue

info = versions.VersionInfo.of(caller, version_scraping=version_scraping)
call_dependencies[info] = caller

# Depth-first search on dependencies — only possible when we have source
if isinstance(caller, types.FunctionType):
get_call_dependencies(caller, version_scraping, call_dependencies, visited)

return call_dependencies


def split_by_version_availability(
call_dependencies: CallDependencies,
) -> tuple[CallDependencies, CallDependencies]:
"""
Partition *call_dependencies* by whether a version string is available.

Args:
call_dependencies: The dependency map to partition.

Returns:
A ``(has_version, no_version)`` tuple of :data:`CallDependencies` dicts.
"""
has_version: CallDependencies = {}
no_version: CallDependencies = {}
for info, dependents in call_dependencies.items():
if info.version is None:
no_version[info] = dependents
else:
has_version[info] = dependents

return has_version, no_version


class CallCollector(ast.NodeVisitor):
def __init__(self):
self.calls: list[ast.expr] = []

def visit_Call(self, node: ast.Call) -> None:
self.calls.append(node.func)
self.generic_visit(node)
204 changes: 204 additions & 0 deletions tests/unit/test_crawler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
import unittest

from pyiron_snippets import versions

from flowrep import crawler

# ---------------------------------------------------------------------------
# Helper functions defined at module level so they have inspectable source,
# a proper __module__, and a stable __qualname__.
# ---------------------------------------------------------------------------


def _leaf():
return 42


def _single_call():
return _leaf()


def _diamond_a():
return _leaf()


def _diamond_b():
return _leaf()


def _diamond_root():
_diamond_a()
_diamond_b()


def _mutual_b():
return _leaf()


def _mutual_a():
return _mutual_b()


# Mutual recursion to exercise cycle detection.
def _cycle_a():
return _cycle_b() # noqa: F821 — defined below


def _cycle_b():
return _cycle_a()


def _no_calls():
x = 1 + 2
return x


def _calls_len():
return len([1, 2, 3])


def _nested_call():
return _single_call()


def _multi_call():
a = _leaf()
b = _leaf()
return a + b


def _fqn(func) -> str:
return versions.VersionInfo.of(func).fully_qualified_name


def _fqns(deps: crawler.CallDependencies) -> set[str]:
return {info.fully_qualified_name for info in deps}


class TestGetCallDependencies(unittest.TestCase):
"""Tests for :func:`crawler.get_call_dependencies`."""

# --- basic behaviour ---

def test_no_calls_returns_empty(self):
deps = crawler.get_call_dependencies(_no_calls)
self.assertEqual(deps, {})

def test_single_direct_call(self):
deps = crawler.get_call_dependencies(_single_call)
self.assertIn(_fqn(_leaf), _fqns(deps))

def test_transitive_dependencies(self):
deps = crawler.get_call_dependencies(_nested_call)
fqns = _fqns(deps)
# Should find both _single_call and _leaf
self.assertIn(_fqn(_single_call), fqns)
self.assertIn(_fqn(_leaf), fqns)

def test_diamond_dependency_no_duplicate_keys(self):
"""
_diamond_root -> _diamond_a -> _leaf AND _diamond_root -> _diamond_b -> _leaf.
_leaf's VersionInfo should appear exactly once as a key.
"""
deps = crawler.get_call_dependencies(_diamond_root)
matching = [info for info in deps if info.fully_qualified_name == _fqn(_leaf)]
self.assertEqual(len(matching), 1)

# --- cycle safety ---

def test_cycle_does_not_recurse_infinitely(self):
# Should terminate without RecursionError
deps = crawler.get_call_dependencies(_cycle_a)
self.assertIn(_fqn(_cycle_b), _fqns(deps))

# --- builtins / non-FunctionType callables ---

def test_builtin_callable_included(self):
deps = crawler.get_call_dependencies(_calls_len)
self.assertIn(_fqn(len), _fqns(deps))

def test_returns_dict_type(self):
deps = crawler.get_call_dependencies(_leaf)
self.assertIsInstance(deps, dict)


class TestSplitByVersionAvailability(unittest.TestCase):
"""Tests for :func:`crawler.split_by_version_availability`."""

@staticmethod
def _make_info(
module: str, qualname: str, version: str | None = None
) -> versions.VersionInfo:
return versions.VersionInfo(
module=module,
qualname=qualname,
version=version,
)

def test_empty_input(self):
has, no = crawler.split_by_version_availability({})
self.assertEqual(has, {})
self.assertEqual(no, {})

def test_all_versioned(self):
info_a = self._make_info("pkg", "a", "1.0")
info_b = self._make_info("pkg", "b", "2.0")
deps: crawler.CallDependencies = {info_a: _leaf, info_b: _leaf}

has, no = crawler.split_by_version_availability(deps)
self.assertEqual(len(has), 2)
self.assertEqual(len(no), 0)

def test_all_unversioned(self):
info_a = self._make_info("local", "a")
info_b = self._make_info("local", "b")
deps: crawler.CallDependencies = {info_a: _leaf, info_b: _leaf}

has, no = crawler.split_by_version_availability(deps)
self.assertEqual(len(has), 0)
self.assertEqual(len(no), 2)

def test_mixed(self):
versioned = self._make_info("pkg", "x", "3.1")
unversioned = self._make_info("local", "y")
deps: crawler.CallDependencies = {
versioned: _leaf,
unversioned: _single_call,
}

has, no = crawler.split_by_version_availability(deps)
self.assertIn(versioned, has)
self.assertIn(unversioned, no)
self.assertNotIn(versioned, no)
self.assertNotIn(unversioned, has)

def test_partition_is_exhaustive_and_disjoint(self):
"""Every key in the input appears in exactly one partition."""
infos = [
self._make_info("pkg", "a", "1.0"),
self._make_info("local", "b"),
self._make_info("pkg", "c", "0.1"),
self._make_info("local", "d"),
]
deps: crawler.CallDependencies = {info: _leaf for info in infos}

has, no = crawler.split_by_version_availability(deps)
self.assertEqual(set(has) | set(no), set(deps))
self.assertTrue(set(has).isdisjoint(set(no)))

def test_version_none_vs_empty_string(self):
"""Only ``None`` counts as unversioned; an empty string is still 'versioned'."""
none_version = self._make_info("local", "f", None)
empty_version = self._make_info("local", "g", "")
deps: crawler.CallDependencies = {
none_version: _leaf,
empty_version: _leaf,
}

has, no = crawler.split_by_version_availability(deps)
self.assertIn(none_version, no)
self.assertIn(empty_version, has)


if __name__ == "__main__":
unittest.main()
Loading