Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
Show all changes
69 commits
Select commit Hold shift + click to select a range
a68f2ae
Add test file
sdruskat Nov 21, 2025
95288a2
added first e2e harvest test for SoftwareMetadata
Jan 12, 2026
4920090
added more tests
Jan 16, 2026
38ef40e
refactored end to end tests
Jan 16, 2026
ddcd26a
updated creation of SoftwareMetadata objects in e2e tests
Jan 19, 2026
9b44b53
added tests and support for curate step
Jan 19, 2026
6c3ba13
started to add support for deposit step and added useful method for S…
Jan 19, 2026
feeb16b
worked on invenio deposit
Jan 23, 2026
4f6cc9d
Merge branch 'refactor/data-model' into feature/454-e2e-deposit
Jan 26, 2026
ed0916b
fixed bugs in invenio.py
Jan 26, 2026
382e2c3
fixed bug and adjusted tests
Jan 30, 2026
96861ec
adjusted invenio.py and its test a bit
Feb 2, 2026
248ae33
added adjusted files from feature/153-refactor-datamodel for process
Feb 6, 2026
ebebca4
added first tests
Feb 6, 2026
f21df49
(re)added version and help commands to the available commands
Feb 9, 2026
d4d9ca8
made test for process step more complex
Feb 9, 2026
7cfa7bc
made process step and ld_container._to_expanded_json more robust
Feb 9, 2026
520ef39
improved flake8 rating
Feb 9, 2026
8606933
Merge branch 'refactor/423-implement-public-api' into feature/454-e2e…
SKernchen Feb 13, 2026
d04b0e2
Remove SysExit call
SKernchen Feb 13, 2026
13a0952
Fix typo
SKernchen Feb 13, 2026
bcdc821
added lots of comments and fixed small inconsistencies
Feb 13, 2026
1c10dca
added coments and fix small bug
Feb 27, 2026
aa4284e
removed unnecessary print statements
Feb 27, 2026
4080091
json_ids are now returned as ld_dicts instead of the id string
Mar 3, 2026
b7543ee
reworked merging and added strategies
Mar 5, 2026
08619ee
fixed formation errors
Mar 5, 2026
ac36a28
updated strategies and match functions
Mar 6, 2026
3291c4d
formatting and doc strings
Mar 9, 2026
7365526
Merge branch 'feature/454-e2e-test-plugin-api' into feature/454-e2e-c…
notactuallyfinn Mar 9, 2026
62f1345
Merge pull request #468 from softwarepub/feature/454-e2e-curate
notactuallyfinn Mar 9, 2026
1e781ca
fixed mistake in merge
Mar 9, 2026
2878155
Merge pull request #470 from softwarepub/feature/454-e2e-process
notactuallyfinn Mar 9, 2026
57cdfb0
Merge branch 'feature/454-e2e-test-plugin-api' into feature/454-e2e-d…
notactuallyfinn Mar 9, 2026
e5065c2
Merge pull request #469 from softwarepub/feature/454-e2e-deposit
notactuallyfinn Mar 9, 2026
0e8b49c
fixed minor bugs
Mar 9, 2026
bd7e732
adapted postprocess and added test
Mar 10, 2026
a30d385
changed imports and fixed syntax error for python 3.10
Mar 11, 2026
50c58d3
Merge pull request #475 from softwarepub/feature/454-e2e-postprocess
notactuallyfinn Mar 11, 2026
230ee05
fixed syntax error and added lost dependency
Mar 11, 2026
1fcbb4b
adjusted comments and config for doc build to try get it running
Mar 11, 2026
65f3d10
tweeked documentation a bit
Mar 13, 2026
4f3372c
tweeked documentation of ld_list
Mar 13, 2026
45459fc
improved comments for ld_dict
Mar 13, 2026
8afb37b
split test_api_e2e.py into multiple files
Mar 13, 2026
4a08fbe
added and updated comments
Mar 13, 2026
1d1c18d
commented the rest of the new files
Mar 16, 2026
02340e0
added support for user defined merge strategies
Mar 16, 2026
5e296cb
added class to ignore list for autoapi
Mar 18, 2026
630dd00
updated documentation for plugin development, made curate pluginizabl…
Mar 19, 2026
296fbb7
fixed formatting errors
Mar 19, 2026
758dbde
reactivated tests and added afew log calls
Mar 20, 2026
7de8c7d
applied simple suggestions
Mar 20, 2026
ad63e5a
fixed misspelled class name
Mar 23, 2026
7a8e8ae
implemented suggestions and fixed bug
Mar 23, 2026
9d3dc7f
added test_case and generate strategies automatically
Mar 25, 2026
c699ae2
finished implementation of CodemetaProcessPlugin
Mar 26, 2026
d514c9f
implement more comments
Mar 26, 2026
ba8b549
add codemeta_doi postprocess plugin
Mar 26, 2026
9b1c48a
reworked zenodo_sandbox_auth for cli testing
Mar 26, 2026
55e086d
fix post process and add license header to conftest.py
Mar 26, 2026
9d770a1
improve error handling of commands
Mar 26, 2026
180cc10
make process more verbose for errors while merging
Mar 26, 2026
0251cd1
potentially fixed error where multiple record ids are halucinated.
Mar 26, 2026
73467f5
flake8
Mar 26, 2026
34877b9
adjusted logging a bit
Mar 27, 2026
a0c0005
fix tests that are affected by error handling update
Mar 27, 2026
279e672
added another process test and fixed small bug
Mar 27, 2026
65399be
flake8
Mar 27, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions src/hermes/commands/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
# "unused import" errors.
# flake8: noqa

from hermes.commands.base import HermesHelpCommand
from hermes.commands.base import HermesVersionCommand
from hermes.commands.clean.base import HermesCleanCommand
from hermes.commands.init.base import HermesInitCommand
from hermes.commands.curate.base import HermesCurateCommand
# from hermes.commands.base import HermesHelpCommand
# from hermes.commands.base import HermesVersionCommand
# from hermes.commands.clean.base import HermesCleanCommand
# from hermes.commands.init.base import HermesInitCommand
# from hermes.commands.curate.base import HermesCurateCommand
from hermes.commands.harvest.base import HermesHarvestCommand
from hermes.commands.process.base import HermesProcessCommand
from hermes.commands.deposit.base import HermesDepositCommand
from hermes.commands.postprocess.base import HermesPostprocessCommand
# from hermes.commands.process.base import HermesProcessCommand
# from hermes.commands.deposit.base import HermesDepositCommand
# from hermes.commands.postprocess.base import HermesPostprocessCommand
76 changes: 25 additions & 51 deletions src/hermes/commands/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,19 @@
import logging
import pathlib
from importlib import metadata
from typing import Dict, Optional, Type
from typing import Type, Union

import toml
from pydantic import BaseModel
from pydantic_settings import BaseSettings, SettingsConfigDict


class _HermesSettings(BaseSettings):
class HermesSettings(BaseSettings):
"""Root class for HERMES configuration model."""

model_config = SettingsConfigDict(env_file_encoding='utf-8')

logging: Dict = {}
logging: dict = {}


class HermesCommand(abc.ABC):
Expand All @@ -31,7 +31,7 @@ class HermesCommand(abc.ABC):
"""

command_name: str = ""
settings_class: Type = _HermesSettings
settings_class: Type = HermesSettings

def __init__(self, parser: argparse.ArgumentParser):
"""Initialize a new instance of any HERMES command.
Expand All @@ -45,28 +45,27 @@ def __init__(self, parser: argparse.ArgumentParser):
self.log = logging.getLogger(f"hermes.{self.command_name}")
self.errors = []

@classmethod
def init_plugins(cls):
def init_plugins(self):
"""Collect and initialize the plugins available for the HERMES command."""

# Collect all entry points for this group (i.e., all valid plug-ins for the step)
entry_point_group = f"hermes.{cls.command_name}"
group_plugins = {
entry_point.name: entry_point.load()
for entry_point in metadata.entry_points(group=entry_point_group)
}

# Collect the plug-in specific configurations
cls.derive_settings_class({
plugin_name: plugin_class.settings_class
for plugin_name, plugin_class in group_plugins.items()
if hasattr(plugin_class, "settings_class") and plugin_class.settings_class is not None
})
entry_point_group = f"hermes.{self.command_name}"
group_plugins = {}
group_settings = {}

for entry_point in metadata.entry_points(group=entry_point_group):
plugin_cls = entry_point.load()

group_plugins[entry_point.name] = plugin_cls
if hasattr(plugin_cls, 'settings_class') and plugin_cls.settings_class is not None:
group_settings[entry_point.name] = plugin_cls.settings_class

self.derive_settings_class(group_settings)

return group_plugins

@classmethod
def derive_settings_class(cls, setting_types: Dict[str, Type]) -> None:
def derive_settings_class(cls, setting_types: dict[str, Type]) -> None:
"""Build a new Pydantic data model class for configuration.

This will create a new class that includes all settings from the plugins available.
Expand Down Expand Up @@ -131,13 +130,10 @@ def init_command_parser(self, command_parser: argparse.ArgumentParser) -> None:

def load_settings(self, args: argparse.Namespace):
"""Load settings from the configuration file (passed in from command line)."""
try:
toml_data = toml.load(args.path / args.config)
self.root_settings = HermesCommand.settings_class.model_validate(toml_data)
self.settings = getattr(self.root_settings, self.command_name)
except FileNotFoundError as e:
self.log.error("hermes.toml was not found. Try to run 'hermes init' first or create one manually.")
raise e # This will lead to our default error message & sys.exit

toml_data = toml.load(args.path / args.config)
self.root_settings = HermesCommand.settings_class.model_validate(toml_data)
self.settings = getattr(self.root_settings, self.command_name)

def patch_settings(self, args: argparse.Namespace):
"""Process command line options for the settings."""
Expand All @@ -164,7 +160,9 @@ def __call__(self, args: argparse.Namespace):
class HermesPlugin(abc.ABC):
"""Base class for all HERMES plugins."""

settings_class: Optional[Type] = None
pluing_node = None

settings_class: Union[Type, None] = None

@abc.abstractmethod
def __call__(self, command: HermesCommand) -> None:
Expand Down Expand Up @@ -202,27 +200,3 @@ def __call__(self, args: argparse.Namespace) -> None:
# Otherwise, simply show the general help and exit (cleanly).
self.parser.print_help()
self.parser.exit()

def load_settings(self, args: argparse.Namespace):
"""No settings are needed for the help command."""
pass


class HermesVersionSettings(BaseModel):
"""Intentionally empty settings class for the version command."""
pass


class HermesVersionCommand(HermesCommand):
"""Show HERMES version and exit."""

command_name = "version"
settings_class = HermesVersionSettings

def load_settings(self, args: argparse.Namespace):
"""Pass loading settings as not necessary for this command."""
pass

def __call__(self, args: argparse.Namespace) -> None:
self.log.info(metadata.version("hermes"))
self.parser.exit()
24 changes: 13 additions & 11 deletions src/hermes/commands/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
import sys

from hermes import logger
from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand,
HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand,
HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand)
# FIXME: remove comments after new implementation of modules is available
# from hermes.commands import (HermesHelpCommand, HermesVersionCommand, HermesCleanCommand,
# HermesHarvestCommand, HermesProcessCommand, HermesCurateCommand,
# HermesDepositCommand, HermesPostprocessCommand, HermesInitCommand)
from hermes.commands import HermesHarvestCommand
from hermes.commands.base import HermesCommand


Expand All @@ -36,15 +38,15 @@ def main() -> None:
setting_types = {}

for command in (
HermesHelpCommand(parser),
HermesVersionCommand(parser),
HermesInitCommand(parser),
HermesCleanCommand(parser),
# HermesHelpCommand(parser),
# HermesVersionCommand(parser),
# HermesInitCommand(parser),
# HermesCleanCommand(parser),
HermesHarvestCommand(parser),
HermesProcessCommand(parser),
HermesCurateCommand(parser),
HermesDepositCommand(parser),
HermesPostprocessCommand(parser),
# HermesProcessCommand(parser),
# HermesCurateCommand(parser),
# HermesDepositCommand(parser),
# HermesPostprocessCommand(parser),
):
if command.settings_class is not None:
setting_types[command.command_name] = command.settings_class
Expand Down
44 changes: 21 additions & 23 deletions src/hermes/commands/harvest/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,13 @@
# SPDX-FileContributor: Michael Meinel

import argparse
import typing as t
from datetime import datetime

from pydantic import BaseModel

from hermes.commands.base import HermesCommand, HermesPlugin
from hermes.model.context import HermesContext, HermesHarvestContext
from hermes.model.error import HermesValidationError, HermesMergeError
from hermes.model.context_manager import HermesContext
from hermes.model.error import HermesValidationError
from hermes.model import SoftwareMetadata


class HermesHarvestPlugin(HermesPlugin):
Expand All @@ -21,11 +20,11 @@ class HermesHarvestPlugin(HermesPlugin):
TODO: describe the harvesting process and how this is mapped to this plugin.
"""

def __call__(self, command: HermesCommand) -> t.Tuple[t.Dict, t.Dict]:
def __call__(self, command: HermesCommand) -> tuple[SoftwareMetadata, dict]:
pass


class _HarvestSettings(BaseModel):
class HarvestSettings(BaseModel):
"""Generic harvesting settings."""

sources: list[str] = []
Expand All @@ -35,32 +34,31 @@ class HermesHarvestCommand(HermesCommand):
""" Harvest metadata from configured sources. """

command_name = "harvest"
settings_class = _HarvestSettings
settings_class = HarvestSettings

def __call__(self, args: argparse.Namespace) -> None:
self.args = args
ctx = HermesContext()

# Initialize the harvest cache directory here to indicate the step ran
ctx.init_cache("harvest")
ctx = HermesContext()
ctx.prepare_step('harvest')

for plugin_name in self.settings.sources:
plugin_cls = self.plugins[plugin_name]

try:
plugin_func = self.plugins[plugin_name]()
harvested_data, tags = plugin_func(self)

with HermesHarvestContext(ctx, plugin_name) as harvest_ctx:
harvest_ctx.update_from(harvested_data,
plugin=plugin_name,
timestamp=datetime.now().isoformat(), **tags)
for _key, ((_value, _tag), *_trace) in harvest_ctx._data.items():
if any(v != _value and t == _tag for v, t in _trace):
raise HermesMergeError(_key, None, _value)

except KeyError as e:
self.log.error("Plugin '%s' not found.", plugin_name)
self.errors.append(e)
# Load plugin and run the harvester
plugin_func = plugin_cls()
harvested_data = plugin_func(self)

with ctx[plugin_name] as plugin_ctx:
plugin_ctx["codemeta"] = harvested_data[0].compact()
plugin_ctx["context"] = {"@context": harvested_data[0].full_context}

plugin_ctx["expanded"] = harvested_data[0].ld_value

except HermesValidationError as e:
self.log.error("Error while executing %s: %s", plugin_name, e)
self.errors.append(e)

ctx.finalize_step('harvest')
33 changes: 17 additions & 16 deletions src/hermes/commands/harvest/cff.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,16 @@
import logging
import pathlib
import urllib.request
import typing as t

from pydantic import BaseModel
from ruamel.yaml import YAML
import jsonschema
from cffconvert import Citation
from typing import Any, Union

from hermes.model.context import ContextPath
from hermes.model.errors import HermesValidationError
from hermes.model.error import HermesValidationError
from hermes.commands.harvest.base import HermesHarvestPlugin, HermesHarvestCommand
from hermes.model import SoftwareMetadata


# TODO: should this be configurable via a CLI option?
Expand All @@ -35,7 +35,7 @@ class CffHarvestSettings(BaseModel):
class CffHarvestPlugin(HermesHarvestPlugin):
settings_class = CffHarvestSettings

def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]:
def __call__(self, command: HermesHarvestCommand) -> tuple[SoftwareMetadata, dict]:
# Get source files
cff_file = self._get_single_cff(command.args.path)
if not cff_file:
Expand All @@ -44,23 +44,24 @@ def __call__(self, command: HermesHarvestCommand) -> t.Tuple[t.Dict, t.Dict]:

# Read the content
cff_data = cff_file.read_text()

# Validate the content to be correct CFF
cff_dict = self._load_cff_from_file(cff_data)

if command.settings.cff.enable_validation and not self._validate(cff_file, cff_dict):
raise HermesValidationError(cff_file)
if command.settings.cff.enable_validation:
# Validate the content to be correct CFF
if not self._validate(cff_file, cff_dict):
raise HermesValidationError(cff_file)

# Convert to CodeMeta using cffconvert
codemeta_dict = self._convert_cff_to_codemeta(cff_data)
# TODO Replace the following temp patch for #112 once there is a new cffconvert version with cffconvert#309
codemeta_dict = self._patch_author_emails(cff_dict, codemeta_dict)
if "version" in codemeta_dict:
codemeta_dict["version"] = str(codemeta_dict["version"]) # Convert Version to string

return codemeta_dict, {'local_path': str(cff_file)}
# TODO Replace the following temp patch for #112 once there is a new cffconvert version with cffconvert#309
codemeta_dict = self._patch_author_emails(cff_dict, codemeta_dict)
ld_codemeta = SoftwareMetadata(codemeta_dict, extra_vocabs={'legalName': {'@id': "http://schema.org/name"}})
return ld_codemeta, {}

def _load_cff_from_file(self, cff_data: str) -> t.Any:
def _load_cff_from_file(self, cff_data: str) -> Any:
yaml = YAML(typ='safe')
yaml.constructor.yaml_constructors[u'tag:yaml.org,2002:timestamp'] = yaml.constructor.yaml_constructors[
u'tag:yaml.org,2002:str']
Expand All @@ -73,11 +74,11 @@ def _patch_author_emails(self, cff: dict, codemeta: dict) -> dict:
codemeta["author"][i]["email"] = author["email"]
return codemeta

def _convert_cff_to_codemeta(self, cff_data: str) -> t.Any:
def _convert_cff_to_codemeta(self, cff_data: str) -> Any:
codemeta_str = Citation(cff_data).as_codemeta()
return json.loads(codemeta_str)

def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool:
def _validate(self, cff_file: pathlib.Path, cff_dict: dict) -> bool:
audit_log = logging.getLogger('audit.cff')

cff_schema_url = f'https://citation-file-format.github.io/{_CFF_VERSION}/schema.json'
Expand All @@ -93,7 +94,7 @@ def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool:
audit_log.warning('!!! warning "%s is not valid according to <%s>"', cff_file, cff_schema_url)

for error in errors:
path = ContextPath.make(error.absolute_path or ['root'])
path = error.absolute_path or ['root']
audit_log.info(' Invalid input for `%s`.', str(path))
audit_log.info(' !!! message "%s"', error.message)
audit_log.debug(' !!! value "%s"', error.instance)
Expand All @@ -108,7 +109,7 @@ def _validate(self, cff_file: pathlib.Path, cff_dict: t.Dict) -> bool:
audit_log.info('- Found valid Citation File Format file at: %s', cff_file)
return True

def _get_single_cff(self, path: pathlib.Path) -> t.Optional[pathlib.Path]:
def _get_single_cff(self, path: pathlib.Path) -> Union[pathlib.Path, None]:
# Find CFF files in directories and subdirectories
cff_file = path / 'CITATION.cff'
if cff_file.exists():
Expand Down
Loading
Loading