oumi-ai · idoudali · Mar 25, 2026 · Mar 28, 2026 · oelachqar · Apr 14, 2026
diff --git a/src/oumi/cli/main.py b/src/oumi/cli/main.py
@@ -57,6 +57,7 @@
 from oumi.cli.synth import synth
 from oumi.cli.train import train
 from oumi.cli.tune import tune
+from oumi.exceptions import OumiConfigError
 from oumi.utils.logging import should_use_rich_logging
 
 _ASCII_LOGO = r"""
@@ -365,6 +366,9 @@ def run():
             telemetry = TelemetryManager.get_instance()
             with telemetry.capture_operation(event_name, event_properties):
                 return app()
+    except OumiConfigError as e:
+        CONSOLE.print(f"[red]Error: {e}[/red]")
+        sys.exit(1)
     except Exception as e:
         tb_str = traceback.format_exc()
         CONSOLE.print(tb_str)

diff --git a/src/oumi/core/configs/__init__.py b/src/oumi/core/configs/__init__.py
@@ -158,12 +158,14 @@
 from oumi.core.configs.synthesis_config import SynthesisConfig
 from oumi.core.configs.training_config import TrainingConfig
 from oumi.core.configs.tuning_config import TuningConfig
+from oumi.exceptions import OumiConfigError, OumiConfigFileNotFoundError
 
 __all__ = [
     "AsyncEvaluationConfig",
     "AutoWrapPolicy",
     "BackwardPrefetch",
     "BaseConfig",
+    "OumiConfigFileNotFoundError",
     "DataParams",
     "DatasetParams",
     "DatasetSplit",
@@ -192,6 +194,7 @@
     "MixedPrecisionDtype",
     "MixtureStrategy",
     "ModelParams",
+    "OumiConfigError",
     "PeftParams",
     "PeftSaveMode",
     "ProfilerParams",

diff --git a/src/oumi/core/configs/analyze_config.py b/src/oumi/core/configs/analyze_config.py
@@ -21,6 +21,7 @@
 
 from oumi.core.configs.base_config import BaseConfig
 from oumi.core.configs.params.base_params import BaseParams
+from oumi.exceptions import OumiConfigValueError
 
 
 class DatasetSource(Enum):
@@ -167,14 +168,16 @@ def __post_init__(self):
 
         # Validate sample_count
         if self.sample_count is not None and self.sample_count <= 0:
-            raise ValueError("`sample_count` must be greater than 0.")
+            raise OumiConfigValueError("`sample_count` must be greater than 0.")
 
         # Validate analyzer configurations
         analyzer_ids = set()
         for analyzer in self.analyzers:
             # Validate analyzer ID
             if not analyzer.id:
-                raise ValueError("Analyzer 'id' must be provided")
+                raise OumiConfigValueError("Analyzer 'id' must be provided")
             if analyzer.id in analyzer_ids:
-                raise ValueError(f"Duplicate analyzer ID found: '{analyzer.id}'")
+                raise OumiConfigValueError(
+                    f"Duplicate analyzer ID found: '{analyzer.id}'"
+                )
             analyzer_ids.add(analyzer.id)
diff --git a/src/oumi/core/configs/async_evaluation_config.py b/src/oumi/core/configs/async_evaluation_config.py
@@ -18,6 +18,7 @@
 
 from oumi.core.configs.base_config import BaseConfig
 from oumi.core.configs.evaluation_config import EvaluationConfig
+from oumi.exceptions import OumiConfigValueError
 
 
 @dataclass
@@ -48,6 +49,6 @@ class AsyncEvaluationConfig(BaseConfig):
     def __post_init__(self):
         """Verifies/populates params."""
         if self.polling_interval < 0:
-            raise ValueError("`polling_interval` must be non-negative.")
+            raise OumiConfigValueError("`polling_interval` must be non-negative.")
         if self.num_retries < 0:
-            raise ValueError("`num_retries` must be non-negative.")
+            raise OumiConfigValueError("`num_retries` must be non-negative.")
diff --git a/src/oumi/core/configs/base_config.py b/src/oumi/core/configs/base_config.py
@@ -25,6 +25,7 @@
 from omegaconf import OmegaConf
 
 from oumi.core.configs.params.base_params import BaseParams
+from oumi.exceptions import OumiConfigError, OumiConfigFileNotFoundError
 
 T = TypeVar("T", bound="BaseConfig")
 
@@ -128,6 +129,10 @@ def _read_config_without_interpolation(config_path: str) -> str:
     Returns:
         str: The stringified configuration.
     """
+    if not Path(config_path).is_file():
+        raise OumiConfigFileNotFoundError(
+            f"Config file not found or path is not a file: {config_path}"
+        )
     with open(config_path) as f:
         stringified_config = f.read()
         pattern = r"(?<!\\)\$\{"  # Matches "${" but not "\${"
@@ -165,7 +170,11 @@ def to_yaml(self, config_path: str | Path | StringIO) -> None:
                 + "\n".join(f"- {path}" for path in sorted(removed_paths))
             )
 
-        OmegaConf.save(config=processed_config, f=config_path)
+        try:
+            OmegaConf.save(config=processed_config, f=config_path)
+        except OSError as e:
+            # handle missing parent folder
+            raise OumiConfigError(f"Failed to save config to {config_path}: {e}") from e
 
     @classmethod
     def from_yaml(
@@ -181,6 +190,10 @@ def from_yaml(
         Returns:
             BaseConfig: The merged configuration object.
         """
+        if not Path(config_path).is_file():
+            raise OumiConfigFileNotFoundError(
+                f"Config file not found or path is not a file: {config_path}"
+            )
         schema = OmegaConf.structured(cls)
         if ignore_interpolation:
             stringified_config = _read_config_without_interpolation(str(config_path))
@@ -317,8 +330,8 @@ def __finalize_and_validate__(self) -> None:
         This method can be overridden by subclasses to implement custom
         validation logic.
 
-        In case of validation errors, this method should raise a `ValueError`
-        or other appropriate exception.
+        In case of validation errors, this method should raise
+        `OumiConfigValueError` or another appropriate exception.
         """
 
     def __iter__(self) -> Iterator[tuple[str, Any]]:

diff --git a/src/oumi/core/types/exceptions.py → src/oumi/core/configs/exceptions.py b/src/oumi/core/types/exceptions.py → src/oumi/core/configs/exceptions.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+"""Backward-compatible re-exports. Canonical definitions live in oumi.exceptions."""
 
-class HardwareException(Exception):
-    """An exception thrown for invalid hardware configurations."""
+from oumi.exceptions import OumiConfigError, OumiConfigFileNotFoundError
+
+__all__ = ["OumiConfigFileNotFoundError", "OumiConfigError"]
diff --git a/src/oumi/core/configs/judge_config.py b/src/oumi/core/configs/judge_config.py
@@ -25,6 +25,7 @@
 from oumi.core.configs.inference_config import InferenceConfig
 from oumi.core.configs.params.judge_params import JudgeParams
 from oumi.core.configs.params.rule_judge_params import RuleJudgeParams
+from oumi.exceptions import OumiConfigValueError
 
 JUDGE_CONFIG_REPO_PATH_TEMPLATE = "oumi://configs/projects/judges/{path}.yaml"
 
@@ -109,14 +110,14 @@ def _resolve_path(unresolved_path: str) -> str | None:
             try:
                 return cls.from_yaml_and_arg_list(resolved_path, extra_args)
             except Exception as e:
-                raise ValueError(
+                raise OumiConfigValueError(
                     f"Failed to parse {resolved_path} as JudgeConfig. "
                     f"Please ensure the YAML file contains both 'judge_params' and "
                     f"'inference_config' sections with valid fields. "
                     f"Original error: {e}"
                 ) from e
         else:
-            raise ValueError(
+            raise OumiConfigValueError(
                 f"Could not resolve JudgeConfig from path: {path}. "
                 "Please provide a valid local or GitHub repo path."
             )
diff --git a/src/oumi/core/configs/params/base_params.py b/src/oumi/core/configs/params/base_params.py
@@ -42,8 +42,8 @@ def __finalize_and_validate__(self) -> None:
         This method can be overridden by subclasses to implement custom
         validation logic.
 
-        In case of validation errors, this method should raise a `ValueError`
-        or other appropriate exception.
+        In case of validation errors, this method should raise
+        `OumiConfigValueError` or another appropriate exception.
         """
 
     def __iter__(self) -> Iterator[tuple[str, Any]]:

diff --git a/src/oumi/core/configs/params/data_params.py b/src/oumi/core/configs/params/data_params.py
@@ -21,6 +21,7 @@
 from omegaconf import MISSING
 
 from oumi.core.configs.params.base_params import BaseParams
+from oumi.exceptions import OumiConfigValueError
 
 
 # Training Params
@@ -49,7 +50,7 @@ def get_literal_value(self) -> Literal["first_exhausted", "all_exhausted"]:
         elif self.value == MixtureStrategy.ALL_EXHAUSTED:
             return "all_exhausted"
         else:
-            raise ValueError("Unsupported value for MixtureStrategy")
+            raise OumiConfigValueError("Unsupported value for MixtureStrategy")
 
 
 @dataclass
@@ -149,24 +150,28 @@ def __post_init__(self):
         """Verifies params."""
         if self.sample_count is not None:
             if self.sample_count < 0:
-                raise ValueError("`sample_count` must be greater than 0.")
+                raise OumiConfigValueError("`sample_count` must be greater than 0.")
         if self.mixture_proportion is not None:
             if self.mixture_proportion < 0:
-                raise ValueError("`mixture_proportion` must be greater than 0.")
+                raise OumiConfigValueError(
+                    "`mixture_proportion` must be greater than 0."
+                )
             if self.mixture_proportion > 1:
-                raise ValueError("`mixture_proportion` must not be greater than 1.0 .")
+                raise OumiConfigValueError(
+                    "`mixture_proportion` must not be greater than 1.0 ."
+                )
 
         if self.transform_num_workers is not None:
             if isinstance(self.transform_num_workers, str):
                 if not (self.transform_num_workers == "auto"):
-                    raise ValueError(
+                    raise OumiConfigValueError(
                         "Unknown value of transform_num_workers: "
                         f"{self.transform_num_workers}. Must be 'auto' if string."
                     )
             elif (not isinstance(self.transform_num_workers, int)) or (
                 self.transform_num_workers <= 0
             ):
-                raise ValueError(
+                raise OumiConfigValueError(
                     "Non-positive value of transform_num_workers: "
                     f"{self.transform_num_workers}."
                 )
@@ -176,7 +181,7 @@ def __post_init__(self):
                 self.dataset_kwargs.keys()
             )
             if len(conflicting_keys) > 0:
-                raise ValueError(
+                raise OumiConfigValueError(
                     "dataset_kwargs attempts to override the following "
                     f"reserved fields: {conflicting_keys}. "
                     "Use properties of DatasetParams instead."
@@ -270,23 +275,23 @@ def __post_init__(self):
             if not all(
                 [dataset.mixture_proportion is not None for dataset in self.datasets]
             ):
-                raise ValueError(
+                raise OumiConfigValueError(
                     "If `mixture_proportion` is specified it must be "
                     " specified for all datasets"
                 )
             mix_sum = sum(
                 filter(None, [dataset.mixture_proportion for dataset in self.datasets])
             )
             if not self._is_sum_normalized(mix_sum):
-                raise ValueError(
+                raise OumiConfigValueError(
                     "The sum of `mixture_proportion` must be 1.0. "
                     f"The current sum is {mix_sum} ."
                 )
         if (
             self.mixture_strategy != MixtureStrategy.ALL_EXHAUSTED
             and self.mixture_strategy != MixtureStrategy.FIRST_EXHAUSTED
         ):
-            raise ValueError(
+            raise OumiConfigValueError(
                 "`mixture_strategy` must be one of "
                 f'["{MixtureStrategy.FIRST_EXHAUSTED.value}", '
                 f'"{MixtureStrategy.ALL_EXHAUSTED.value}"].'
@@ -324,12 +329,12 @@ def get_split(self, split: DatasetSplit) -> DatasetSplitParams:
         elif split == DatasetSplit.VALIDATION:
             return self.validation
         else:
-            raise ValueError(f"Received invalid split: {split}.")
+            raise OumiConfigValueError(f"Received invalid split: {split}.")
 
     def __finalize_and_validate__(self):
         """Verifies params."""
         if len(self.train.datasets) == 0:
-            raise ValueError("At least one training dataset is required.")
+            raise OumiConfigValueError("At least one training dataset is required.")
 
         all_collators = set()
         if self.train.collator_name:
@@ -339,11 +344,11 @@ def __finalize_and_validate__(self):
         if self.test.collator_name:
             all_collators.add(self.test.collator_name)
         if len(all_collators) >= 2:
-            raise ValueError(
+            raise OumiConfigValueError(
                 f"Different data collators are not supported yet: {all_collators}"
             )
         elif len(all_collators) == 1 and not self.train.collator_name:
-            raise ValueError(
+            raise OumiConfigValueError(
                 "Data collator must be also specified "
                 f"on the `train` split: {all_collators}"
             )
diff --git a/src/oumi/core/configs/params/deepspeed_params.py b/src/oumi/core/configs/params/deepspeed_params.py
@@ -18,6 +18,7 @@
 from typing import Any
 
 from oumi.core.configs.params.base_params import BaseParams
+from oumi.exceptions import OumiConfigValueError
 
 
 class ZeRORuntimeStage(str, Enum):
@@ -287,7 +288,7 @@ def __post_init__(self) -> None:
             self.offload_param is not None
             and self.zero_stage != ZeRORuntimeStage.ZERO_3
         ):
-            raise ValueError(
+            raise OumiConfigValueError(
                 "Parameter offloading is only supported with ZeRO stage 3. "
                 f"Current stage: {self.zero_stage}"
             )
@@ -297,7 +298,7 @@ def __post_init__(self) -> None:
             ZeRORuntimeStage.ZERO_2,
             ZeRORuntimeStage.ZERO_3,
         ]:
-            raise ValueError(
+            raise OumiConfigValueError(
                 "Optimizer offloading requires ZeRO stage 1, 2, or 3. "
                 f"Current stage: {self.zero_stage}"
             )

diff --git a/src/oumi/core/configs/params/evaluation_params.py b/src/oumi/core/configs/params/evaluation_params.py
@@ -17,6 +17,7 @@
 from typing import Any
 
 from oumi.core.configs.params.base_params import BaseParams
+from oumi.exceptions import OumiConfigValueError
 
 
 class EvaluationBackend(Enum):
@@ -108,7 +109,7 @@ def my_evaluation(task_params, config):
     def get_evaluation_backend(self) -> EvaluationBackend:
         """Returns the evaluation backend as an Enum."""
         if not self.evaluation_backend:
-            raise ValueError(
+            raise OumiConfigValueError(
                 "Missing `evaluation_backend`. When running evaluations, it is "
                 "necessary to specify the evaluation backend to use for EACH task. "
                 "The available backends can be found in the following enum: "
@@ -120,7 +121,9 @@ def get_evaluation_backend(self) -> EvaluationBackend:
         elif self.evaluation_backend == EvaluationBackend.CUSTOM.value:
             return EvaluationBackend.CUSTOM
         else:
-            raise ValueError(f"Unknown evaluation backend: {self.evaluation_backend}")
+            raise OumiConfigValueError(
+                f"Unknown evaluation backend: {self.evaluation_backend}"
+            )
 
     @staticmethod
     def list_evaluation_backends() -> str:
@@ -130,7 +133,9 @@ def list_evaluation_backends() -> str:
     def __post_init__(self):
         """Verifies params."""
         if self.num_samples is not None and self.num_samples <= 0:
-            raise ValueError("`num_samples` must be None or a positive integer.")
+            raise OumiConfigValueError(
+                "`num_samples` must be None or a positive integer."
+            )
 
 
 @dataclass
@@ -152,6 +157,6 @@ class LMHarnessTaskParams(EvaluationTaskParams):
     def __post_init__(self):
         """Verifies params."""
         if not self.task_name:
-            raise ValueError("`task_name` must be a valid LM Harness task.")
+            raise OumiConfigValueError("`task_name` must be a valid LM Harness task.")
         if self.num_fewshot and self.num_fewshot < 0:
-            raise ValueError("`num_fewshot` must be non-negative.")
+            raise OumiConfigValueError("`num_fewshot` must be non-negative.")