IAMconsortium · dc-almeida · Mar 2, 2026 · Mar 5, 2026 · Mar 6, 2026 · Mar 9, 2026
diff --git a/docs/api/countries.rst b/docs/api/countries.rst
@@ -6,7 +6,7 @@ A common list of countries
 ==========================
 
 Having an agreed list of country names including a mapping to alpha-3 and alpha-2 codes
-(also know as ISO3 and ISO2 codes) is an important prerequisite for scenario analysis
+(also known as ISO3 and ISO2 codes) is an important prerequisite for scenario analysis
 and model comparison.
 
 The :class:`nomenclature` package builds on the :class:`pycountry` package

diff --git a/docs/api/nuts.rst b/docs/api/nuts.rst
@@ -23,8 +23,78 @@ The full list of NUTS regions is accessible via the Eurostat website (`xlsx, 500
 
   from nomenclature import nuts
 
-  # list of NUTS region codes
-  nuts.codes
+  # Access NUTS region information
+  nuts.codes       # List of all NUTS codes
+  nuts.names       # List of all NUTS region names
 
-  # list of NUTS region names
-  nuts.names
+  # Query specific NUTS levels
+  nuts.get(level=3)            # Get all NUTS3 regions
+
+  # Query by country
+  nuts.get(country_code="AT")  # Get all NUTS regions in Austria
+
+.. currentmodule:: nomenclature.processor.nuts
+
+**NutsProcessor**
+=================
+
+The :class:`NutsProcessor` class provides automated aggregation of scenario data
+across NUTS regions. It performs hierarchical aggregation in the following order:
+
+1. NUTS3 → NUTS2
+2. NUTS2 → NUTS1
+3. NUTS1 → Country
+4. Country → European Union (if ≥ 23 of the 27 EU member states are present)
+5. Country + UK → European Union and United Kingdom (if the United Kingdom is also present)
+
+The EU-level aggregations (steps 4-5) are only performed if the corresponding
+target regions (``European Union`` and ``European Union and United Kingdom``) are
+defined in the project's region codelist. If fewer than 23 EU member states are
+present in the data, the EU aggregation is skipped silently.
+
+The processor ensures that regional data is consistently aggregated and validated
+according to the configured NUTS regions and variable code lists.
+
+Consider the example below for configuring a project using NUTS aggregation.
+The *nomenclature.yaml* in the project directory is as follows:
+
+.. code:: yaml
+
+  dimensions:
+    - region
+    - variable
+  definitions:
+    region:
+      nuts:
+        nuts-3: [ AT ]
+      country: true
+  processors:
+    nuts: [ Model A ]
+
+With this configuration, calling :func:`process` will automatically instantiate
+and apply the :class:`NutsProcessor`.
+
+.. code:: python
+
+  import pyam
+  from nomenclature import DataStructureDefinition, process
+
+  df = pyam.IamDataFrame(data="path/to/file.csv")
+  dsd = DataStructureDefinition("definitions")
+  aggregated_data = process(df, dsd)
+
+The data is aggregated for the applicable variables, creating the common region
+``Austria`` (AT) from its constituent NUTS subregions.
+The country-level regions must be defined in a region definition file or by setting
+*definitions.region.country* as *true* in the configuration file
+(see :ref:`adding-countries`).
+
+.. note::
+
+   Only models listed under ``processors.nuts`` in *nomenclature.yaml* are processed
+   by :class:`NutsProcessor`. Data for other models is passed through unchanged.
+   If a NUTS region appears in the data for a listed model but the corresponding
+   country is missing from ``definitions.region.nuts``, a ``ValueError`` is raised.
+
+.. autoclass:: NutsProcessor
+   :members: from_definition, apply
diff --git a/docs/user_guide/config.rst b/docs/user_guide/config.rst
@@ -114,6 +114,8 @@ the nomenclature package will add all countries to the *region* codelist.
 
 More details on the list of countries can be found here: :ref:`countries`.
 
+.. _adding-countries:
+
 Adding NUTS to the region codelist
 ----------------------------------
 
@@ -174,3 +176,42 @@ the filtering for definitions.
 
 The above example retrieves only the model mapping for *MESSAGEix-GLOBIOM 2.1-M-R12*
 from the common-definitions repository.
+
+Configuring processors
+----------------------
+
+The ``processors`` section of *nomenclature.yaml* allows processors to be declared
+directly in the configuration file, so they are applied automatically when calling
+:func:`process` without passing an explicit ``processor`` argument.
+
+Region processor
+^^^^^^^^^^^^^^^^
+
+Setting *processors.region-processor* as *true* will automatically create a
+:class:`RegionProcessor` from the project's default ``mappings/`` directory:
+
+.. code:: yaml
+
+  processors:
+    region-processor: true
+
+This is equivalent to calling:
+
+.. code:: python
+
+  from nomenclature.processor import RegionProcessor
+  processor = RegionProcessor.from_directory("mappings", dsd)
+
+NUTS processor
+^^^^^^^^^^^^^^
+
+Setting *processors.nuts* to a list of model names will automatically create a
+:class:`NutsProcessor` and apply NUTS hierarchical aggregation (NUTS3 → NUTS2 →
+NUTS1 → Country → EU27) for those models:
+
+.. code:: yaml
+
+  processors:
+    nuts: [ Model A, Model B ]
+
+More details on NUTS aggregation can be found here: :ref:`nuts`.
diff --git a/nomenclature/__init__.py b/nomenclature/__init__.py
@@ -14,6 +14,7 @@
 from nomenclature.nuts import nuts  # noqa
 from nomenclature.processor import (  # noqa
     RegionAggregationMapping,  # noqa
+    NutsProcessor,
     RegionProcessor,
     RequiredDataValidator,
 )

diff --git a/nomenclature/config.py b/nomenclature/config.py
@@ -27,12 +27,24 @@
 
 
 class CodeListFromRepository(BaseModel):
+    """
+    Configuration for a codelist from an external repository.
+
+    The `include` and `exclude` filters allow selecting which definitions to import.
+    """
+
     name: str
     include: list[dict[str, Any]] = [{"name": "*"}]
     exclude: list[dict[str, Any]] = Field(default_factory=list)
 
 
 class CodeListConfig(BaseModel):
+    """Configuration for a dimension's codelist.
+
+    This class lists external repositories for codelists, importing definitions
+    from remote sources.
+    """
+
     dimension: str | None = None
     repositories: list[CodeListFromRepository] = Field(
         default_factory=list, alias="repository"
@@ -60,6 +72,12 @@ def repository_dimension_path(self) -> str:
 
 
 class RegionCodeListConfig(CodeListConfig):
+    """
+    Configuration for a region's codelist.
+
+    This class allows importing the definitions for ISO3 countries and NUTS regions.
+    """
+
     country: bool = False
     nuts: dict[str, str | list[str] | bool] | None = None
 
@@ -77,11 +95,12 @@ def check_nuts(
 
 
 class Repository(BaseModel):
+    """Configuration for an external codelist repository."""
+
     url: str
     hash: str | None = None
     release: str | None = None
     local_path: Path | None = Field(default=None, validate_default=True)
-    # defined via the `repository` name in the configuration
 
     @model_validator(mode="after")
     @classmethod
@@ -150,13 +169,16 @@ def check_external_repo_double_stacking(self):
 
 
 class DataStructureConfig(BaseModel):
-    """A class for configuration of a DataStructureDefinition
+    """
+    Configuration class for the data structure definition.
 
-    Attributes
-    ----------
-    region : RegionCodeListConfig
-        Attributes for configuring the RegionCodeList
+    This class defines the configuration for the main IAMC dimensions:
+    - model
+    - scenario
+    - region
+    - variable
 
+    Each dimension can be configured with its own code list and repository sources.
     """
 
     model: CodeListConfig = Field(default_factory=CodeListConfig)
@@ -179,6 +201,8 @@ def repos(self) -> dict[str, str]:
 
 
 class MappingRepository(BaseModel):
+    """Configuration for a mapping repository."""
+
     name: str
     include: list[str] = ["*"]
 
@@ -196,6 +220,8 @@ def match_models(self, models: list[str]) -> list[str]:
 
 
 class RegionMappingConfig(BaseModel):
+    """Configuration for region mapping/aggregation external repositories."""
+
     repositories: list[MappingRepository] = Field(
         default_factory=list, alias="repository"
     )
@@ -217,7 +243,20 @@ def convert_to_set_of_repos(cls, v):
         return v
 
 
+class ProcessorConfig(BaseModel):
+    """Configuration for region processor settings."""
+
+    nuts: list[str] | None = None
+    region_processor: bool = Field(False, alias="region-processor")
+
+    model_config = ConfigDict(
+        validate_by_name=True, validate_by_alias=True, extra="forbid"
+    )
+
+
 class TimeDomainConfig(BaseModel):
+    """Configuration for time domain validation settings."""
+
     year_allowed: bool = Field(default=True, alias="year")
     datetime_allowed: bool = Field(default=False, alias="datetime")
     timezone: str | None = Field(
@@ -305,6 +344,9 @@ class NomenclatureConfig(BaseModel):
     repositories: dict[str, Repository] = Field(default_factory=dict)
     definitions: DataStructureConfig = Field(default_factory=DataStructureConfig)
     mappings: RegionMappingConfig = Field(default_factory=RegionMappingConfig)
+    processor: ProcessorConfig = Field(
+        default_factory=ProcessorConfig, alias="processors"
+    )
     illegal_characters: list[str] = Field(
         default=[":", ";", '"'], alias="illegal-characters"
     )
@@ -326,6 +368,7 @@ def check_illegal_chars(cls, v: str | list[str]) -> list[str]:
     def check_definitions_repository(
         cls, v: "NomenclatureConfig"
     ) -> "NomenclatureConfig":
+        """Check that all repositories referenced in definitions and mappings exist."""
         mapping_repos = {"mappings": v.mappings.repositories} if v.mappings else {}
         repos: dict[str, list[MappingRepository]] = {
             **v.definitions.repos,
@@ -337,6 +380,15 @@ def check_definitions_repository(
                 raise ValueError((f"Unknown repository {unknown_repos} in '{use}'."))
         return v
 
+    @model_validator(mode="after")
+    @classmethod
+    def check_nuts_consistency(cls, v: "NomenclatureConfig") -> "NomenclatureConfig":
+        if v.processor and v.processor.nuts and not v.definitions.region.nuts:
+            raise ValueError(
+                "`nuts` region processor set but no NUTS regions in `definitions`."
+            )
+        return v
+
     def fetch_repos(self, target_folder: Path):
         for repo_name, repo in self.repositories.items():
             repo.fetch_repo(target_folder / repo_name)

diff --git a/nomenclature/core.py b/nomenclature/core.py
@@ -5,6 +5,7 @@
 
 from nomenclature.definition import DataStructureDefinition
 from nomenclature.processor import Processor, RegionProcessor
+from nomenclature.processor.nuts import NutsProcessor
 
 logger = logging.getLogger(__name__)
 
@@ -21,11 +22,13 @@ def process(
     This function is the recommended way of using the nomenclature package. It performs
     the following operations:
 
-    * Validation against the codelists and criteria of a DataStructureDefinition
-    * Region-processing, which can consist of three parts:
-        1. Model native regions not listed in the model mapping will be dropped
-        2. Model native regions can be renamed
-        3. Aggregation from model native regions to "common regions"
+    * Validation against the codelists and criteria of a :class:`DataStructureDefinition`
+    * Region processing, which can occur via one or more :class:`Processor` instances. This can be:
+        * Region aggregation (via :class:`RegionProcessor`), which renames and aggregates based on user-provided mappings.
+            1. Model native regions not listed in the model mapping will be dropped
+            2. Model native regions can be renamed
+            3. Aggregation from model native regions to "common regions"
+        * NUTS aggregation (via :class:`NutsProcessor`), which aggregates NUTS3 -> NUTS2 -> NUTS1 -> Country -> EU27(+UK)
     * Validation of consistency across the variable hierarchy
 
     Parameters
@@ -36,9 +39,9 @@ def process(
         Codelists that are used for validation.
     dimensions : list, optional
         Dimensions to be used in the validation, defaults to all dimensions defined in
-        `dsd`
-    processor : :class:`RegionProcessor`, optional
-        Region processor to perform region renaming and aggregation (if given)
+        ``dsd``.
+    processor : :class:`Processor` or list of :class:`Processor`, optional
+        One or more processors to apply. Runs before any config-declared processors.
 
     Returns
     -------
@@ -56,8 +59,30 @@ def process(
 
     dimensions = dimensions or dsd.dimensions
 
+    # Auto-instantiate processors declared in nomenclature.yaml under 'processors'
+    # Explicit processors take precedence; config-based ones are appended after.
+    if dsd.config.processor.region_processor:
+        if any(isinstance(p, RegionProcessor) for p in processor):
+            logger.info(
+                "Config declares 'region-processor: true' but an explicit "
+                "RegionProcessor was provided -- skipping config-defined processor."
+            )
+        else:
+            processor = processor + [
+                RegionProcessor.from_directory(dsd.project_folder / "mappings", dsd)
+            ]
+
+    if dsd.config.processor.nuts is not None:
+        if any(isinstance(p, NutsProcessor) for p in processor):
+            logger.info(
+                "Config declares 'nuts' processor but an explicit NutsProcessor "
+                "was provided -- skipping config-defined processor."
+            )
+        else:
+            processor = processor + [NutsProcessor.from_definition(dsd)]
+
     if (
-        any(isinstance(p, RegionProcessor) for p in processor)
+        any(isinstance(p, (RegionProcessor, NutsProcessor)) for p in processor)
         and "region" in dimensions
     ):
         dimensions.remove("region")

diff --git a/nomenclature/processor/__init__.py b/nomenclature/processor/__init__.py
@@ -3,6 +3,7 @@
     RegionAggregationMapping,
     RegionProcessor,
 )
+from nomenclature.processor.nuts import NutsProcessor  # noqa
 from nomenclature.processor.required_data import RequiredDataValidator  # noqa
 from nomenclature.processor.data_validator import DataValidator  # noqa
 from nomenclature.processor.aggregator import Aggregator  # noqa