From a4ba17b7ffd438fcfcd7598cd0df78bb1f963e3e Mon Sep 17 00:00:00 2001 From: Calvin Pieters Date: Sun, 19 Apr 2026 12:02:06 +0300 Subject: [PATCH 1/3] Add support for CABS in ORCA F12 calculations Introduces the `cabs` argument as the single source of truth for the Complementary Auxiliary Basis Set in ORCA. Previously, users had to pack CABS into the `auxiliary_basis` string. This change adds a dedicated field and implements a validation guard: if an F12 method is specified without a CABS basis, the adapter now raises a ValueError. This prevents ORCA from silently falling back to non-F12 energies (DimCABS = 0). Also refactors the ORCA adapter to use `setdefault` for handling keyword defaults and updates documentation to reflect the new level specification format. --- arc/job/adapters/orca.py | 34 +++++++++++++++++------------ arc/job/adapters/orca_test.py | 41 +++++++++++++++++++++++++++++++++++ docs/source/advanced.rst | 17 ++++++++++----- 3 files changed, 72 insertions(+), 20 deletions(-) diff --git a/arc/job/adapters/orca.py b/arc/job/adapters/orca.py index 71e6334f2c..01d0ceeef5 100644 --- a/arc/job/adapters/orca.py +++ b/arc/job/adapters/orca.py @@ -84,13 +84,14 @@ def _format_orca_basis(basis: str) -> str: # job_type_2: reserved for Opt + Freq. # restricted: 'R' = closed-shell SCF, 'U' = spin unrestricted SCF, 'RO' = open-shell spin restricted SCF # auxiliary_basis: required for DLPNO calculations (speed up calculation) +# cabs: Complementary Auxiliary Basis Set for F12 calculations (e.g., cc-pVTZ-F12-CABS) # memory: MB per core (must increase as system gets larger) # cpus: must be less than number of electron pairs, defaults to min(heavy atoms, cpus limit) # job_options_blocks: input blocks that enable detailed control over program # job_options_keywords: input keywords that control the job # method_class: 'HF' for wavefunction methods (hf, mp, cc, dlpno ...). 'KS' for DFT methods. # options: additional keywords to control job (e.g., TightSCF, NormalPNO ...) -input_template = """!${restricted}${method_class} ${method} ${basis} ${auxiliary_basis} ${keywords} +input_template = """!${restricted}${method_class} ${method} ${basis} ${auxiliary_basis}${cabs} ${keywords} !${job_type_1} ${job_type_2} %%maxcore ${memory} @@ -254,6 +255,12 @@ def write_input_file(self) -> None: """ Write the input file to execute the job on the server. """ + if 'f12' in self.level.method and not self.level.cabs: + raise ValueError( + f"Level '{self.level}' uses an F12 method without a CABS basis. " + f"Set `cabs:` in the level spec (e.g. cc-pVTZ-F12-CABS). " + f"Without it ORCA runs with DimCABS = 0 and returns non-F12 energies." + ) input_dict = dict() for key in ['block', 'scan', @@ -264,6 +271,7 @@ def write_input_file(self) -> None: input_dict[key] = '' input_dict['auxiliary_basis'] = _format_orca_basis(self.level.auxiliary_basis or '') input_dict['basis'] = _format_orca_basis(self.level.basis or '') + input_dict['cabs'] = f' {_format_orca_basis(self.level.cabs)}' if self.level.cabs else '' input_dict['charge'] = self.charge input_dict['cpus'] = self.cpu_cores input_dict['label'] = self.species_label @@ -272,30 +280,28 @@ def write_input_file(self) -> None: input_dict['multiplicity'] = self.multiplicity input_dict['xyz'] = xyz_to_str(self.xyz) - scf_convergence = self.args['keyword'].get('scf_convergence', '').lower() or \ - orca_default_options_dict['global']['keyword'].get('scf_convergence', '').lower() - if not scf_convergence: + self.args['keyword'].setdefault( + 'scf_convergence', + orca_default_options_dict['global']['keyword'].get('scf_convergence', '').lower()) + if not self.args['keyword']['scf_convergence']: raise ValueError('Orca SCF convergence is not specified. Please specify this variable either in ' 'settings.py as default or in the input file as additional options.') - self.add_to_args(val=scf_convergence, key1='keyword') # Orca requires different blocks for wavefunction methods and DFT methods if self.level.method_type == 'dft': input_dict['method_class'] = 'KS' - # DFT grid must be the same for both opt and freq - if self.fine: - self.add_to_args(val='defgrid3', key1='keyword') - else: - self.add_to_args(val='defgrid2', key1='keyword') + # DFT grid must be the same for both opt and freq. + # Users can override by setting `dft_grid` in args.keyword (e.g. dft_grid: DEFGRID1). + self.args['keyword'].setdefault('dft_grid', 'defgrid3' if self.fine else 'defgrid2') elif self.level.method_type == 'wavefunction': input_dict['method_class'] = 'HF' if 'dlpno' in self.level.method: - dlpno_threshold = self.args['keyword'].get('dlpno_threshold', '').lower() or \ - orca_default_options_dict['global']['keyword'].get('dlpno_threshold', '').lower() - if not dlpno_threshold: + self.args['keyword'].setdefault( + 'dlpno_threshold', + orca_default_options_dict['global']['keyword'].get('dlpno_threshold', '').lower()) + if not self.args['keyword']['dlpno_threshold']: raise ValueError('Orca DLPNO threshold is not specified. Please specify this variable either in ' 'settings.py as default or in the input file as additional options.') - self.add_to_args(val=dlpno_threshold, key1='keyword') else: logger.debug(f'Running {self.level.method_type} {self.level.method} method in Orca.') diff --git a/arc/job/adapters/orca_test.py b/arc/job/adapters/orca_test.py index 4b7725da51..f4c077efb4 100644 --- a/arc/job/adapters/orca_test.py +++ b/arc/job/adapters/orca_test.py @@ -188,6 +188,47 @@ def test_write_input_file_with_CPCM_solvation(self): """ self.assertEqual(content_3, job_3_expected_input_file) + def test_write_input_file_f12_with_cabs(self): + """F12 sp_level with a cabs basis emits the CABS token on the ! line.""" + job_f12 = OrcaAdapter(execution_type='queue', + job_type='sp', + level=Level(method='DLPNO-CCSD(T)-F12', + basis='cc-pVTZ-F12', + auxiliary_basis='aug-cc-pVTZ/C', + cabs='cc-pVTZ-F12-CABS'), + project='test_f12', + project_directory=os.path.join(ARC_TESTING_PATH, 'test_OrcaAdapter'), + species=[ARCSpecies(label='O_atom', smiles='[O]', + xyz='O 0.0 0.0 0.0')], + testing=True, + ) + job_f12.write_input_file() + with open(os.path.join(job_f12.local_path, input_filenames[job_f12.job_adapter]), 'r') as f: + content = f.read() + bang_line = content.splitlines()[0] + self.assertIn('dlpno-ccsd(t)-f12', bang_line) + self.assertIn('cc-pvtz-f12', bang_line) + self.assertIn('aug-cc-pvtz/c', bang_line) + self.assertIn('cc-pvtz-f12-cabs', bang_line) + + def test_write_input_file_f12_without_cabs_raises(self): + """F12 sp_level without a cabs basis raises at input-file generation.""" + # _initialize_adapter calls set_files() which calls write_input_file(), + # so the guard fires during OrcaAdapter construction — wrap the whole + # thing in assertRaises. + with self.assertRaises(ValueError): + OrcaAdapter(execution_type='queue', + job_type='sp', + level=Level(method='DLPNO-CCSD(T)-F12', + basis='cc-pVTZ-F12', + auxiliary_basis='aug-cc-pVTZ/C'), + project='test_f12_bad', + project_directory=os.path.join(ARC_TESTING_PATH, 'test_OrcaAdapter'), + species=[ARCSpecies(label='O_atom', smiles='[O]', + xyz='O 0.0 0.0 0.0')], + testing=True, + ) + def test_format_orca_method(self): """Test ORCA method formatting helper.""" self.assertEqual(_format_orca_method('wb97xd3'), 'wb97x-d3') diff --git a/docs/source/advanced.rst b/docs/source/advanced.rst index 091fa90b41..505c9c72ea 100644 --- a/docs/source/advanced.rst +++ b/docs/source/advanced.rst @@ -88,13 +88,17 @@ Another example:: sp_level: {'method': 'DLPNO-CCSD(T)-F12', 'basis': 'cc-pVTZ-F12', - 'auxiliary_basis': 'aug-cc-pVTZ/C cc-pVTZ-F12-CABS', + 'auxiliary_basis': 'aug-cc-pVTZ/C', + 'cabs': 'cc-pVTZ-F12-CABS', 'args': {'keyword' :{'opt_convergence': 'TightOpt'}}, 'software': 'orca', } -specifies ``DLPNO-CCSD(T)-F12/cc-pVTZ-F12`` model chemistry along with two auxiliary basis sets, -``aug-cc-pVTZ/C`` and ``cc-pVTZ-F12-CABS``, with ``TightOpt`` for a single point energy calculation. +specifies ``DLPNO-CCSD(T)-F12/cc-pVTZ-F12`` model chemistry along with an +auxiliary basis ``aug-cc-pVTZ/C`` and a complementary auxiliary basis (CABS) +``cc-pVTZ-F12-CABS``, with ``TightOpt`` for a single point energy calculation. +The ``cabs`` argument is the single source of truth for F12 complementary +auxiliary basis sets; do not pack the CABS token into ``auxiliary_basis``. You can also provide a 4-digit ``year`` on ``arkane_level_of_theory`` to distinguish method variants in the Arkane database (e.g., ``b97d3`` vs ``b97d32023``):: @@ -118,9 +122,10 @@ The following are examples for **equivalent** definitions:: conformer_opt_level = {'method': 'PM6'} -Note that the ``cabs`` and ``solvation_scheme_level`` arguments currently have no effect -and will be implemented in future versions. The ``software`` argument is automatically determined -unless specified by the user. +Note that the ``solvation_scheme_level`` argument currently has no effect and +will be implemented in future versions. The ``cabs`` argument is consumed by +the ORCA and Molpro adapters for F12 calculations; it is ignored by other ESS. +The ``software`` argument is automatically determined unless specified by the user. ARC also supports an additional shortcut argument, ``level_of_theory``, to simultaneously specify ``opt_level``, ``freq_level``, ``sp_level``, and ``scan_level``. From 53aeb68431339f62d15e161732db151031036ab3 Mon Sep 17 00:00:00 2001 From: Calvin Pieters Date: Sun, 19 Apr 2026 12:02:47 +0300 Subject: [PATCH 2/3] Refine DLPNO fallback logic for monoatomic species Restricts the HF fallback for DLPNO methods to single-electron atoms (H, D, T) only. Heavier monoatomic species like O or N can be handled by DLPNO in ORCA and no longer have the "dlpno-" prefix stripped. Additionally, the HF fallback now preserves Level attributes like CABS and auxiliary basis sets by using the `as_dict()` representation, ensuring consistency with recent ORCA F12 enhancements. --- arc/scheduler.py | 19 +++++++++---------- arc/scheduler_test.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/arc/scheduler.py b/arc/scheduler.py index c56fae7d72..e5830f3aa4 100644 --- a/arc/scheduler.py +++ b/arc/scheduler.py @@ -1444,16 +1444,15 @@ def run_sp_job(self, level_of_theory='ccsd/cc-pvdz', job_type='sp') return - if self.species_dict[label].is_monoatomic() and 'dlpno' in level.method: - species = self.species_dict[label] - if species.mol.atoms[0].element.symbol in ('H', 'D', 'T'): - logger.info(f'Using HF/{level.basis} for {label} (single electron, no correlation).') - level = Level(method='hf', basis=level.basis, software=level.software, args=level.args) - else: - canonical_method = level.method.replace('dlpno-', '') - logger.info(f'DLPNO methods are incompatible with monoatomic species {label}. ' - f'Using {canonical_method}/{level.basis} instead.') - level = Level(method=canonical_method, basis=level.basis, software=level.software, args=level.args) + if self.species_dict[label].is_monoatomic() and 'dlpno' in level.method \ + and self.species_dict[label].mol.atoms[0].element.symbol in ('H', 'D', 'T'): + # DLPNO needs electron pairs; fall back to HF for single-electron atoms only. + # Heavier monoatomics (e.g. [O], [N]) run DLPNO fine in ORCA and are left alone. + logger.info(f'Using HF/{level.basis} for {label} (single electron, no correlation).') + level_dict = level.as_dict() + level_dict.pop('method_type', None) # re-deduce after method change + level_dict['method'] = 'hf' + level = Level(repr=level_dict) if self.job_types['sp']: if self.species_dict[label].multi_species: if self.output_multi_spc[self.species_dict[label].multi_species].get('sp', False): diff --git a/arc/scheduler_test.py b/arc/scheduler_test.py index cdc4b17f07..f272478464 100644 --- a/arc/scheduler_test.py +++ b/arc/scheduler_test.py @@ -1005,6 +1005,34 @@ def test_switch_ts_rotors_reset(self, mock_run_opt): # rotors_dict=None must be preserved — do not re-enable rotor scans. self.assertIsNone(sched2.species_dict[ts_label2].rotors_dict) + @patch('arc.scheduler.Scheduler.run_job') + def test_run_sp_monoatomic_dlpno(self, mock_run_job): + """Monoatomic H falls back to HF; heavier atoms (O) keep DLPNO intact.""" + dlpno_level = Level(method='DLPNO-CCSD(T)-F12', basis='cc-pVTZ-F12', + auxiliary_basis='aug-cc-pVTZ/C', cabs='cc-pVTZ-F12-CABS', + software='orca') + + for label, smiles in [('H_atom', '[H]'), ('O_atom', '[O]')]: + self.sched1.species_dict[label] = ARCSpecies(label=label, smiles=smiles) + self.sched1.job_dict[label] = {} + self.sched1.output[label] = {'paths': {}, 'job_types': {}, + 'errors': '', 'warnings': '', 'conformers': ''} + + # Single-electron atom → HF fallback, aux/cabs preserved. + self.sched1.run_sp_job(label='H_atom', level=dlpno_level) + h_level = mock_run_job.call_args.kwargs['level_of_theory'] + self.assertEqual(h_level.method, 'hf') + self.assertEqual(h_level.basis, 'cc-pvtz-f12') + self.assertEqual(h_level.auxiliary_basis, 'aug-cc-pvtz/c') + self.assertEqual(h_level.cabs, 'cc-pvtz-f12-cabs') + + # Heavier monoatomic → DLPNO level unchanged. + mock_run_job.reset_mock() + self.sched1.run_sp_job(label='O_atom', level=dlpno_level) + o_level = mock_run_job.call_args.kwargs['level_of_theory'] + self.assertEqual(o_level.method, 'dlpno-ccsd(t)-f12') + self.assertEqual(o_level.cabs, 'cc-pvtz-f12-cabs') + @classmethod def tearDownClass(cls): """ From c99d25483d055f35b098b557278d8984f665b100 Mon Sep 17 00:00:00 2001 From: Calvin Pieters Date: Sun, 19 Apr 2026 12:03:27 +0300 Subject: [PATCH 3/3] Refine DLPNO monoatomic logic and update CABS test coverage Restricts the DLPNO incompatibility check in ORCA to single-electron species only, as heavier monoatomic species are supported. Additionally, updates the test suite to reflect the transition of the CABS basis from the auxiliary basis string to its own dedicated attribute. --- arc/job/trsh.py | 4 ++-- arc/main_test.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arc/job/trsh.py b/arc/job/trsh.py index f1878a7011..27d606f532 100644 --- a/arc/job/trsh.py +++ b/arc/job/trsh.py @@ -1018,8 +1018,8 @@ def trsh_ess_job(label: str, couldnt_trsh = True elif 'orca' in software: - if 'dlpno' in level_of_theory.method and (is_monoatomic or is_h): - raise TrshError(f'DLPNO methods are incompatible with monoatomic species {label} in Orca. ' + if 'dlpno' in level_of_theory.method and is_h: + raise TrshError(f'DLPNO methods are incompatible with single-electron species {label} in Orca. ' f'This should have been caught by the Scheduler before job submission.') elif 'Memory' in job_status['keywords']: # Increase memory allocation. diff --git a/arc/main_test.py b/arc/main_test.py index 4034c55cd5..a5a5b81bd1 100644 --- a/arc/main_test.py +++ b/arc/main_test.py @@ -298,14 +298,14 @@ def test_determine_model_chemistry_for_job_types(self): freq_level={'method': 'B3LYP/G', 'basis': 'cc-pVDZ(fi/sf/fw)', 'auxiliary_basis': 'def2-svp/C', 'dispersion': 'DEF2-tzvp/c'}, sp_level={'method': 'DLPNO-CCSD(T)-F12', 'basis': 'cc-pVTZ-F12', - 'auxiliary_basis': 'aug-cc-pVTZ/C cc-pVTZ-F12-CABS'}, + 'auxiliary_basis': 'aug-cc-pVTZ/C', 'cabs': 'cc-pVTZ-F12-CABS'}, calc_freq_factor=False, compute_thermo=False) self.assertEqual(arc9.opt_level.simple(), 'wb97xd/def2tzvp') self.assertEqual(str(arc9.freq_level), 'b3lyp/g/cc-pvdz(fi/sf/fw), auxiliary_basis: def2-svp/c, ' 'dispersion: def2-tzvp/c, software: gaussian') self.assertEqual(str(arc9.sp_level), - 'dlpno-ccsd(t)-f12/cc-pvtz-f12, auxiliary_basis: aug-cc-pvtz/c cc-pvtz-f12-cabs, ' - 'software: orca') + 'dlpno-ccsd(t)-f12/cc-pvtz-f12, auxiliary_basis: aug-cc-pvtz/c, ' + 'cabs: cc-pvtz-f12-cabs, software: orca') # Test using default frequency and orbital level for composite job, also forbid rotors job arc10 = ARC(project='test', composite_method='cbs-qb3', calc_freq_factor=False,