Skip to content
Draft
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ jobs:
working-directory: ARC
run: |
echo "Running Unit Tests..."
export PYTEST_INLINE_DURATIONS=1
export PYTHONPATH="${{ github.workspace }}/AutoTST:${{ github.workspace }}/KinBot:$PYTHONPATH"
pytest arc/ --cov --cov-report=xml -ra -vv -n auto

Expand All @@ -104,6 +105,7 @@ jobs:
working-directory: ${{ github.workspace }}/ARC
run: |
echo "Running Functional Tests from $(pwd)..."
export PYTEST_INLINE_DURATIONS=1
export PYTHONPATH="${{ github.workspace }}/AutoTST:${{ github.workspace }}/KinBot:$PYTHONPATH"
pytest functional/ -ra -vv -n auto

Expand Down
72 changes: 58 additions & 14 deletions arc/family/family.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
"""

from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
from collections import OrderedDict
import ast
import hashlib
import os
import re

Expand All @@ -22,6 +24,13 @@

logger = get_logger()

_ENTRY_RE = re.compile(r'entry\((.*?)\)', re.DOTALL)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cool, thanks!
do any of the copilot comments make sense?

_ENTRY_LABEL_RE = re.compile(r'label = "(.*?)"')
_ENTRY_GROUP_RE = re.compile(r'group =(.*?)(?=\w+ =)', re.DOTALL)
_ENTRIES_CACHE_MAX = 32
_ENTRIES_CACHE: "OrderedDict[str, Dict[str, str]]" = OrderedDict()
_REACTION_FAMILY_CACHE: Dict[Tuple[str, bool], 'ReactionFamily'] = {}

Comment on lines +27 to +33
Copy link

Copilot AI Jan 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This PR is described as adding pytest duration reporting in CI, but this file introduces additional behavior changes (regex precompilation plus global caches and get_reaction_family() usage) that appear unrelated to CI reporting. If these are intentional, they should be called out in the PR description; otherwise, consider splitting them into a separate PR to keep scope focused and reviewable.

Copilot uses AI. Check for mistakes.

def get_rmg_db_subpath(*parts: str, must_exist: bool = False) -> str:
"""Return a path under the RMG database, handling both source and packaged layouts."""
Expand Down Expand Up @@ -481,7 +490,7 @@ def determine_possible_reaction_products_from_family(rxn: 'ARCReaction',
and whether the family's template also represents its own reverse.
"""
product_dicts = list()
family = ReactionFamily(label=family_label, consider_arc_families=consider_arc_families)
family = get_reaction_family(label=family_label, consider_arc_families=consider_arc_families)
products = family.generate_products(reactants=rxn.get_reactants_and_products(return_copies=True)[0])
if products:
for group_labels, product_lists in products.items():
Expand Down Expand Up @@ -808,17 +817,8 @@ def get_entries(groups_as_lines: List[str],
Returns:
Dict[str, str]: The extracted entries, keys are the labels, values are the groups.
"""
groups_str = ''.join(groups_as_lines)
entries = re.findall(r'entry\((.*?)\)', groups_str, re.DOTALL)
specific_entries = dict()
for i, entry in enumerate(entries):
label_match = re.search(r'label = "(.*?)"', entry)
group_match = re.search(r'group =(.*?)(?=\w+ =)', entry, re.DOTALL)
if label_match is not None and group_match is not None and label_match.group(1) in entry_labels:
specific_entries[label_match.group(1)] = clean_text(group_match.group(1))
if i > 2000:
break
return specific_entries
entries_map = _get_entries_map(groups_as_lines)
return {label: entries_map[label] for label in entry_labels if label in entries_map}


def get_group_adjlist(groups_as_lines: List[str],
Expand All @@ -834,8 +834,52 @@ def get_group_adjlist(groups_as_lines: List[str],
Returns:
str: The extracted group.
"""
specific_entries = get_entries(groups_as_lines, entry_labels=[entry_label])
return specific_entries[entry_label]
entries_map = _get_entries_map(groups_as_lines)
return entries_map[entry_label]


def _get_entries_map(groups_as_lines: List[str]) -> Dict[str, str]:
cache_key = _fingerprint_lines(groups_as_lines)
cached = _ENTRIES_CACHE.get(cache_key)
if cached is not None:
_ENTRIES_CACHE.move_to_end(cache_key)
return cached
groups_str = ''.join(groups_as_lines)
entries_map: Dict[str, str] = {}
Comment on lines +841 to +848
Copy link

Copilot AI Jan 27, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_get_entries_map() caches by id(groups_as_lines) and only validates via a very weak (len, first, last) signature. This can return incorrect results if a different list instance later reuses the same id (possible after GC) and happens to match the signature, and _ENTRIES_CACHE will also grow without bound over time. Prefer caching by stable content (e.g., hash/tuple of lines or the groups file path + mtime) or use a bounded/LRU cache; alternatively remove this cache since get_reaction_family() now caches ReactionFamily instances.

Copilot uses AI. Check for mistakes.
for i, match in enumerate(_ENTRY_RE.finditer(groups_str)):
entry = match.group(1)
label_match = _ENTRY_LABEL_RE.search(entry)
group_match = _ENTRY_GROUP_RE.search(entry)
if label_match is not None and group_match is not None:
entries_map[label_match.group(1)] = clean_text(group_match.group(1))
if i > 2000:
break
_ENTRIES_CACHE[cache_key] = entries_map
_ENTRIES_CACHE.move_to_end(cache_key)
if len(_ENTRIES_CACHE) > _ENTRIES_CACHE_MAX:
_ENTRIES_CACHE.popitem(last=False)
return entries_map


def _fingerprint_lines(groups_as_lines: List[str]) -> str:
"""Create a stable fingerprint for a groups file represented as lines."""
hasher = hashlib.blake2b(digest_size=16)
for line in groups_as_lines:
hasher.update(line.encode('utf-8', errors='ignore'))
hasher.update(b'\0')
return hasher.hexdigest()


def get_reaction_family(label: str,
consider_arc_families: bool = True,
) -> 'ReactionFamily':
key = (label, consider_arc_families)
cached = _REACTION_FAMILY_CACHE.get(key)
if cached is not None:
return cached
family = ReactionFamily(label=label, consider_arc_families=consider_arc_families)
_REACTION_FAMILY_CACHE[key] = family
return family
Comment on lines +876 to +882
Copy link

Copilot AI Jan 26, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_REACTION_FAMILY_CACHE stores ReactionFamily instances globally with no eviction. These objects hold groups_as_lines and derived data, so this can retain significant memory over a long-running process and makes behavior dependent on global cache state. Consider adding a bounded/LRU cache, an explicit cache clear mechanism, or limiting caching to call sites that can guarantee reuse (instead of caching unconditionally at module scope).

Copilot uses AI. Check for mistakes.


def get_isomorphic_subgraph(isomorphic_subgraph_1: dict,
Expand Down
13 changes: 13 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
import os
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe put this in devtools? or under .actions?



def pytest_report_teststatus(report, config):
Comment thread Fixed
inline_durations = os.getenv("PYTEST_INLINE_DURATIONS") == "1"
if report.when == "call" and inline_durations:
if report.passed:
return "passed", ".", f"PASSED [{report.duration:.3f}s]"
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

squash this commit into the commit that introduced this file?

if report.failed:
return "failed", "F", f"FAILED [{report.duration:.3f}s]"
if report.skipped:
return "skipped", "s", f"SKIPPED [{report.duration:.3f}s]"
return None
27 changes: 17 additions & 10 deletions functional/restart_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@
from arc.main import ARC


def _worker_suffix() -> str:
"""Use the xdist worker id to isolate per-worker project directories."""
worker = os.getenv('PYTEST_XDIST_WORKER')
return f'_{worker}' if worker else ''


class TestRestart(unittest.TestCase):
"""
Contains unit tests for restarting ARC.
Expand All @@ -36,7 +42,7 @@ def test_restart_thermo(self):
"""
restart_dir = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '1_restart_thermo')
restart_path = os.path.join(restart_dir, 'restart.yml')
project = 'arc_project_for_testing_delete_after_usage_restart_thermo'
project = f'arc_project_for_testing_delete_after_usage_restart_thermo{_worker_suffix()}'
project_directory = os.path.join(ARC_PATH, 'Projects', project)
os.makedirs(os.path.dirname(project_directory), exist_ok=True)
shutil.copytree(os.path.join(restart_dir, 'calcs'), os.path.join(project_directory, 'calcs', 'Species'), dirs_exist_ok=True)
Expand All @@ -55,7 +61,7 @@ def test_restart_thermo(self):
break
self.assertTrue(thermo_dft_ccsdtf12_bac)

with open(os.path.join(project_directory, 'arc_project_for_testing_delete_after_usage_restart_thermo.info'), 'r') as f:
with open(os.path.join(project_directory, f'{project}.info'), 'r') as f:
sts, n2h3, oet, lot, ap = False, False, False, False, False
for line in f.readlines():
if 'Considered the following species and TSs:' in line:
Expand Down Expand Up @@ -133,7 +139,7 @@ def test_restart_rate_1(self):
"""Test restarting ARC and attaining a reaction rate coefficient"""
restart_dir = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '2_restart_rate')
restart_path = os.path.join(restart_dir, 'restart.yml')
project = 'arc_project_for_testing_delete_after_usage_restart_rate_1'
project = f'arc_project_for_testing_delete_after_usage_restart_rate_1{_worker_suffix()}'
project_directory = os.path.join(ARC_PATH, 'Projects', project)
os.makedirs(os.path.dirname(project_directory), exist_ok=True)
shutil.copytree(os.path.join(restart_dir, 'calcs'), os.path.join(project_directory, 'calcs'), dirs_exist_ok=True)
Expand All @@ -154,7 +160,7 @@ def test_restart_rate_1(self):

def test_restart_rate_2(self):
"""Test restarting ARC and attaining a reaction rate coefficient"""
project = 'arc_project_for_testing_delete_after_usage_restart_rate_2'
project = f'arc_project_for_testing_delete_after_usage_restart_rate_2{_worker_suffix()}'
project_directory = os.path.join(ARC_PATH, 'Projects', project)
base_path = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '5_TS1')
restart_path = os.path.join(base_path, 'restart.yml')
Expand Down Expand Up @@ -183,7 +189,7 @@ def test_restart_bde (self):
"""Test restarting ARC and attaining a BDE for anilino_radical."""
restart_dir = os.path.join(ARC_PATH, 'arc', 'testing', 'restart', '3_restart_bde')
restart_path = os.path.join(restart_dir, 'restart.yml')
project = 'test_restart_bde'
project = f'test_restart_bde{_worker_suffix()}'
project_directory = os.path.join(ARC_PATH, 'Projects', project)
os.makedirs(os.path.dirname(project_directory), exist_ok=True)
shutil.copytree(os.path.join(restart_dir, 'calcs'), os.path.join(project_directory, 'calcs'), dirs_exist_ok=True)
Expand All @@ -192,7 +198,7 @@ def test_restart_bde (self):
arc1 = ARC(**input_dict)
arc1.execute()

report_path = os.path.join(ARC_PATH, 'Projects', 'test_restart_bde', 'output', 'BDE_report.txt')
report_path = os.path.join(ARC_PATH, 'Projects', project, 'output', 'BDE_report.txt')
with open(report_path, 'r') as f:
lines = f.readlines()
self.assertIn(' BDE report for anilino_radical:\n', lines)
Expand All @@ -218,10 +224,11 @@ def tearDownClass(cls):
A function that is run ONCE after all unit tests in this class.
Delete all project directories created during these unit tests
"""
projects = ['arc_project_for_testing_delete_after_usage_restart_thermo',
'arc_project_for_testing_delete_after_usage_restart_rate_1',
'arc_project_for_testing_delete_after_usage_restart_rate_2',
'test_restart_bde',
suffix = _worker_suffix()
projects = [f'arc_project_for_testing_delete_after_usage_restart_thermo{suffix}',
f'arc_project_for_testing_delete_after_usage_restart_rate_1{suffix}',
f'arc_project_for_testing_delete_after_usage_restart_rate_2{suffix}',
f'test_restart_bde{suffix}',
]
for project in projects:
project_directory = os.path.join(ARC_PATH, 'Projects', project)
Expand Down
Loading