Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ jobs:
working-directory: ARC
run: |
echo "Running Unit Tests..."
export PYTEST_INLINE_DURATIONS=1
export PYTHONPATH="${{ github.workspace }}/AutoTST:${{ github.workspace }}/KinBot:$PYTHONPATH"
pytest arc/ --cov --cov-report=xml -ra -vv -n auto

Expand All @@ -104,6 +105,7 @@ jobs:
working-directory: ${{ github.workspace }}/ARC
run: |
echo "Running Functional Tests from $(pwd)..."
export PYTEST_INLINE_DURATIONS=1
export PYTHONPATH="${{ github.workspace }}/AutoTST:${{ github.workspace }}/KinBot:$PYTHONPATH"
pytest functional/ -ra -vv -n auto

Expand Down
72 changes: 58 additions & 14 deletions arc/family/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
"""

from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
from collections import OrderedDict
import ast
import hashlib
import os
import re

Expand All @@ -22,6 +24,13 @@

logger = get_logger()

_ENTRY_RE = re.compile(r'entry\((.*?)\)', re.DOTALL)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cool, thanks!
do any of the copilot comments make sense?

_ENTRY_LABEL_RE = re.compile(r'label = "(.*?)"')
_ENTRY_GROUP_RE = re.compile(r'group =(.*?)(?=\w+ =)', re.DOTALL)
_ENTRIES_CACHE_MAX = 32
_ENTRIES_CACHE: "OrderedDict[str, Dict[str, str]]" = OrderedDict()
_REACTION_FAMILY_CACHE: Dict[Tuple[str, bool], 'ReactionFamily'] = {}

Comment on lines +27 to +33
Copy link

Copilot AI Jan 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR is described as adding pytest duration reporting in CI, but this file introduces additional behavior changes (regex precompilation plus global caches and get_reaction_family() usage) that appear unrelated to CI reporting. If these are intentional, they should be called out in the PR description; otherwise, consider splitting them into a separate PR to keep scope focused and reviewable.

Copilot uses AI. Check for mistakes.

def get_rmg_db_subpath(*parts: str, must_exist: bool = False) -> str:
"""Return a path under the RMG database, handling both source and packaged layouts."""
Expand Down Expand Up @@ -481,7 +490,7 @@ def determine_possible_reaction_products_from_family(rxn: 'ARCReaction',
and whether the family's template also represents its own reverse.
"""
product_dicts = list()
family = ReactionFamily(label=family_label, consider_arc_families=consider_arc_families)
family = get_reaction_family(label=family_label, consider_arc_families=consider_arc_families)
products = family.generate_products(reactants=rxn.get_reactants_and_products(return_copies=True)[0])
if products:
for group_labels, product_lists in products.items():
Expand Down Expand Up @@ -808,17 +817,8 @@ def get_entries(groups_as_lines: List[str],
Returns:
Dict[str, str]: The extracted entries, keys are the labels, values are the groups.
"""
groups_str = ''.join(groups_as_lines)
entries = re.findall(r'entry\((.*?)\)', groups_str, re.DOTALL)
specific_entries = dict()
for i, entry in enumerate(entries):
label_match = re.search(r'label = "(.*?)"', entry)
group_match = re.search(r'group =(.*?)(?=\w+ =)', entry, re.DOTALL)
if label_match is not None and group_match is not None and label_match.group(1) in entry_labels:
specific_entries[label_match.group(1)] = clean_text(group_match.group(1))
if i > 2000:
break
return specific_entries
entries_map = _get_entries_map(groups_as_lines)
return {label: entries_map[label] for label in entry_labels if label in entries_map}


def get_group_adjlist(groups_as_lines: List[str],
Expand All @@ -834,8 +834,52 @@ def get_group_adjlist(groups_as_lines: List[str],
Returns:
str: The extracted group.
"""
specific_entries = get_entries(groups_as_lines, entry_labels=[entry_label])
return specific_entries[entry_label]
entries_map = _get_entries_map(groups_as_lines)
return entries_map[entry_label]


def _get_entries_map(groups_as_lines: List[str]) -> Dict[str, str]:
cache_key = _fingerprint_lines(groups_as_lines)
cached = _ENTRIES_CACHE.get(cache_key)
if cached is not None:
_ENTRIES_CACHE.move_to_end(cache_key)
return cached
groups_str = ''.join(groups_as_lines)
entries_map: Dict[str, str] = {}
Comment on lines +841 to +848
Copy link

Copilot AI Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_get_entries_map() caches by id(groups_as_lines) and only validates via a very weak (len, first, last) signature. This can return incorrect results if a different list instance later reuses the same id (possible after GC) and happens to match the signature, and _ENTRIES_CACHE will also grow without bound over time. Prefer caching by stable content (e.g., hash/tuple of lines or the groups file path + mtime) or use a bounded/LRU cache; alternatively remove this cache since get_reaction_family() now caches ReactionFamily instances.

Copilot uses AI. Check for mistakes.
for i, match in enumerate(_ENTRY_RE.finditer(groups_str)):
entry = match.group(1)
label_match = _ENTRY_LABEL_RE.search(entry)
group_match = _ENTRY_GROUP_RE.search(entry)
if label_match is not None and group_match is not None:
entries_map[label_match.group(1)] = clean_text(group_match.group(1))
if i > 2000:
break
_ENTRIES_CACHE[cache_key] = entries_map
_ENTRIES_CACHE.move_to_end(cache_key)
if len(_ENTRIES_CACHE) > _ENTRIES_CACHE_MAX:
_ENTRIES_CACHE.popitem(last=False)
return entries_map


def _fingerprint_lines(groups_as_lines: List[str]) -> str:
"""Create a stable fingerprint for a groups file represented as lines."""
hasher = hashlib.blake2b(digest_size=16)
for line in groups_as_lines:
hasher.update(line.encode('utf-8', errors='ignore'))
hasher.update(b'\0')
return hasher.hexdigest()


def get_reaction_family(label: str,
consider_arc_families: bool = True,
) -> 'ReactionFamily':
key = (label, consider_arc_families)
cached = _REACTION_FAMILY_CACHE.get(key)
if cached is not None:
return cached
family = ReactionFamily(label=label, consider_arc_families=consider_arc_families)
_REACTION_FAMILY_CACHE[key] = family
return family
Comment on lines +876 to +882
Copy link

Copilot AI Jan 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_REACTION_FAMILY_CACHE stores ReactionFamily instances globally with no eviction. These objects hold groups_as_lines and derived data, so this can retain significant memory over a long-running process and makes behavior dependent on global cache state. Consider adding a bounded/LRU cache, an explicit cache clear mechanism, or limiting caching to call sites that can guarantee reuse (instead of caching unconditionally at module scope).

Copilot uses AI. Check for mistakes.


def get_isomorphic_subgraph(isomorphic_subgraph_1: dict,
Expand Down
13 changes: 13 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import os
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe put this in devtools? or under .actions?



def pytest_report_teststatus(report, config):
Comment thread Fixed
inline_durations = os.getenv("PYTEST_INLINE_DURATIONS") == "1"
if report.when == "call" and inline_durations:
if report.passed:
return "passed", ".", f"[{report.duration:.3f}s]"
if report.failed:
return "failed", "F", f"[{report.duration:.3f}s]"
if report.skipped:
return "skipped", "s", f"[{report.duration:.3f}s]"
return None