IAMconsortium · dc-almeida · Mar 2, 2026 · Mar 5, 2026 · Mar 6, 2026 · Mar 9, 2026
diff --git a/docs/api/countries.rst b/docs/api/countries.rst
@@ -6,7 +6,7 @@ A common list of countries
 ==========================
 
 Having an agreed list of country names including a mapping to alpha-3 and alpha-2 codes
-(also know as ISO3 and ISO2 codes) is an important prerequisite for scenario analysis
+(also known as ISO3 and ISO2 codes) is an important prerequisite for scenario analysis
 and model comparison.
 
 The :class:`nomenclature` package builds on the :class:`pycountry` package

diff --git a/docs/api/nuts.rst b/docs/api/nuts.rst
@@ -23,8 +23,92 @@ The full list of NUTS regions is accessible via the Eurostat website (`xlsx, 500
 
   from nomenclature import nuts
 
-  # list of NUTS region codes
-  nuts.codes
+  # Access NUTS region information
+  nuts.codes       # List of all NUTS codes
+  nuts.names       # List of all NUTS region names
 
-  # list of NUTS region names
-  nuts.names
+  # Query specific NUTS levels
+  nuts.get(level=3)            # Get all NUTS3 regions
+
+  # Query by country
+  nuts.get(country_code="AT")  # Get all NUTS regions in Austria
+
+.. currentmodule:: nomenclature.processor.nuts
+
+
+**NutsProcessor**
+-----------------
+
+The :class:`NutsProcessor` class provides automated aggregation of scenario data
+across NUTS regions. It performs hierarchical aggregation in the following order:
+
+1. NUTS3 → NUTS2
+2. NUTS2 → NUTS1
+3. NUTS1 → Country
+4. Country → European Union (if ≥ 23 of the 27 EU member states are present)
+5. Country + UK → European Union and United Kingdom (if the United Kingdom is also present)
+
+The EU-level aggregations (steps 4-5) are only performed if the corresponding
+target regions (``European Union`` and ``European Union and United Kingdom``) are
+defined in the project's region codelist. If fewer than 23 EU member states are
+present in the data, the EU aggregation is skipped silently.
+
+The processor ensures that regional data is consistently aggregated and validated
+according to the configured NUTS regions and variable code lists.
+
+Consider the example below for configuring a project using NUTS aggregation.
+The *nomenclature.yaml* in the project directory is as follows:
+
+.. code:: yaml
+
+  dimensions:
+    - region
+    - variable
+  definitions:
+    region:
+      nuts:nuts:
+      nuts-1: [ AT ]
+      nuts-2: [ AT ]
+      nuts-3: [ AT ]
+      country: true
+  processors:
+    nuts: [ Model A ]
+
+With this configuration, calling :func:`process` will automatically instantiate
+and apply the :class:`NutsProcessor`.
+
+.. code:: python
+
+  import pyam
+  from nomenclature import DataStructureDefinition, process
+
+  df = pyam.IamDataFrame(data="path/to/file.csv")
+  dsd = DataStructureDefinition("definitions")
+  aggregated_data = process(df, dsd)
+
+The data is aggregated for the applicable variables, creating the common region
+``Austria`` (AT) from its constituent NUTS subregions.
+The country-level regions must be defined in a region definition file or by setting
+*definitions.region.country* as *true* in the configuration file
+(see :ref:`adding-countries`).
+
+.. note::
+
+   Only NUTS regions explicitly listed under ``definitions.region.nuts`` are added to
+   in the output. The :class:`NutsProcessor` always aggregates through all levels,
+   but intermediate levels are **dropped** from the result unless they are listed
+   in the configuration. In the example above, all three levels (NUTS1, NUTS2, NUTS3)
-   but intermediate levels are **dropped** from the result unless they are listed
-   in the configuration. In the example above, all three levels (NUTS1, NUTS2, NUTS3)
+   but intermediate levels are **dropped** from the result unless if they are not listed
+   in the configuration. In the example above, all three levels (NUTS1, NUTS2, NUTS3)
-   but intermediate levels are **dropped** from the result unless they are listed
-   in the configuration. In the example above, all three levels (NUTS1, NUTS2, NUTS3)
+   but intermediate levels are **dropped** from the result unless if they are not listed
+   in the configuration. In the example above, all three levels (NUTS1, NUTS2, NUTS3)
+   are listed, so the final output includes the original NUTS3 data as well as
+   the aggregated NUTS2 and NUTS1 regions alongside the country-level result.
+   If only ``nuts-3`` were listed, the aggregated NUTS2 and NUTS1 regions would
+   be discarded and only the NUTS3 regions and the country total would be retained.
+
+.. note::
+
+   Only models listed under ``processors.nuts`` in *nomenclature.yaml* are processed
+   by :class:`NutsProcessor`. Data for other models is passed through unchanged.
+   If a NUTS region appears in the data for a listed model but the corresponding
+   country is missing from ``definitions.region.nuts``, a ``ValueError`` is raised.
+
+.. autoclass:: NutsProcessor
+   :members: from_definition, apply
diff --git a/docs/user_guide/config.rst b/docs/user_guide/config.rst
@@ -114,6 +114,8 @@ the nomenclature package will add all countries to the *region* codelist.
 
 More details on the list of countries can be found here: :ref:`countries`.
 
+.. _adding-countries:
+
 Adding NUTS to the region codelist
 ----------------------------------
 
@@ -174,3 +176,47 @@ the filtering for definitions.
 
 The above example retrieves only the model mapping for *MESSAGEix-GLOBIOM 2.1-M-R12*
 from the common-definitions repository.
+
+Configuring processors
+----------------------
+
+The ``processors`` section of *nomenclature.yaml* allows processors to be declared
+directly in the configuration file, so they are applied automatically when calling
+:func:`process` without passing an explicit ``processor`` argument.
+
+Region processor
+^^^^^^^^^^^^^^^^
+
+Setting *processors.region-processor* as *true* will automatically create a
+:class:`RegionProcessor` from the project's default ``mappings/`` directory:
+
+.. code:: yaml
+
+  processors:
+    region-processor: true
+
+This is equivalent to calling:
+
+.. code:: python
+
+  import pyam
+  from nomenclature.processor import RegionProcessor
+
+  df = pyam.IamDataFrame(data="path/to/file.csv")
+  dsd = DataStructureDefinition("definitions")
+  processor = RegionProcessor.from_directory("mappings", dsd)
+  aggregated_data = process(df, dsd)
+
+NUTS processor
+^^^^^^^^^^^^^^
+
+Setting *processors.nuts* to a list of model names will automatically create a
+:class:`NutsProcessor` and apply NUTS hierarchical aggregation (NUTS3 → NUTS2 →
+NUTS1 → Country → EU27) for those models:
+
+.. code:: yaml
+
+  processors:
+    nuts-processor: [ Model A, Model B ]
+
+More details on NUTS aggregation can be found here: :ref:`nuts`.
diff --git a/nomenclature/__init__.py b/nomenclature/__init__.py
@@ -14,6 +14,7 @@
 from nomenclature.nuts import nuts  # noqa
 from nomenclature.processor import (  # noqa
     RegionAggregationMapping,  # noqa
+    NutsProcessor,
     RegionProcessor,
     RequiredDataValidator,
 )

diff --git a/nomenclature/codelist.py b/nomenclature/codelist.py
@@ -537,8 +537,12 @@ def matches_filter(code, filters, keep):
             def check_attribute_match(code_value, filter_value):
                 # if is list -> recursive
                 # if is str -> escape all special characters except "*" and use a regex
+                # if is bool -> match exactly (must be checked before int since bool
+                #   is a subclass of int)
                 # if is int -> match exactly
                 # if is None -> Attribute does not exist therefore does not match
+                if isinstance(filter_value, bool):
+                    return code_value == filter_value
                 if isinstance(filter_value, int):
                     return code_value == filter_value
                 if isinstance(filter_value, str):
@@ -592,6 +596,17 @@ class VariableCodeList(CodeList):
     unknown_code_error: ClassVar[type[UnknownCodeError]] = UnknownVariableError
 
     _data_validator = None
+    _region_aggregation_variables = None
+
+    @property
+    def region_aggregation_variables(self) -> list[str]:
+        """Variable names where skip_region_aggregation is False, cached on first access."""
+        if self._region_aggregation_variables is not None:
+            return self._region_aggregation_variables
+        self._region_aggregation_variables = [
+            var.name for var in self.mapping.values() if not var.skip_region_aggregation
+        ]
+        return self._region_aggregation_variables
 
     @property
     def data_validator(self):
@@ -812,6 +827,7 @@ def from_directory(
                         RegionCode(
                             name=r.code,
                             hierarchy=f"NUTS {level[-1]} regions (2024 edition)",
+                            extra_attributes={"nuts": True},
                         )
                     )
 
@@ -937,5 +953,4 @@ class MetaCodeList(CodeList):
 
 
 class ScenarioCodeList(CodeList):
-
     unknown_code_error = UnknownScenarioError
diff --git a/nomenclature/config.py b/nomenclature/config.py
@@ -27,12 +27,24 @@
 
 
 class CodeListFromRepository(BaseModel):
+    """
+    Configuration for a codelist from an external repository.
+
+    The `include` and `exclude` filters allow selecting which definitions to import.
+    """
+
     name: str
     include: list[dict[str, Any]] = [{"name": "*"}]
     exclude: list[dict[str, Any]] = Field(default_factory=list)
 
 
 class CodeListConfig(BaseModel):
+    """Configuration for a dimension's codelist.
+
+    This class lists external repositories for codelists, importing definitions
+    from remote sources.
+    """
+
     dimension: str | None = None
     repositories: list[CodeListFromRepository] = Field(
         default_factory=list, alias="repository"
@@ -60,6 +72,13 @@ def repository_dimension_path(self) -> str:
 
 
 class RegionCodeListConfig(CodeListConfig):
+    """
+    Configuration for a region codelist.
+
+    This class allows selecting which regions to import from external repositories
+    and importing the definitions for ISO3 countries and NUTS regions.
+    """
+
     country: bool = False
     nuts: dict[str, str | list[str] | bool] | None = None
 
@@ -77,11 +96,12 @@ def check_nuts(
 
 
 class Repository(BaseModel):
+    """Configuration for an external codelist repository."""
+
     url: str
     hash: str | None = None
     release: str | None = None
     local_path: Path | None = Field(default=None, validate_default=True)
-    # defined via the `repository` name in the configuration
 
     @model_validator(mode="after")
     @classmethod
@@ -150,21 +170,22 @@ def check_external_repo_double_stacking(self):
 
 
 class DataStructureConfig(BaseModel):
-    """A class for configuration of a DataStructureDefinition
+    """
+    Configuration class for the data structure definition.
 
-    Attributes
-    ----------
-    region : RegionCodeListConfig
-        Attributes for configuring the RegionCodeList
+    This class defines the configuration for the main IAMC dimensions:
+    - scenario
+    - region
+    - variable
 
+    Each dimension can be configured with its own code list and repository sources.
     """
 
-    model: CodeListConfig = Field(default_factory=CodeListConfig)
     scenario: CodeListConfig = Field(default_factory=CodeListConfig)
     region: RegionCodeListConfig = Field(default_factory=RegionCodeListConfig)
     variable: CodeListConfig = Field(default_factory=CodeListConfig)
 
-    @field_validator("model", "scenario", "region", "variable", mode="before")
+    @field_validator("scenario", "region", "variable", mode="before")
     @classmethod
     def add_dimension(cls, v, info: ValidationInfo):
         return {"dimension": info.field_name, **v}
@@ -173,12 +194,14 @@ def add_dimension(cls, v, info: ValidationInfo):
     def repos(self) -> dict[str, str]:
         return {
             dimension: getattr(self, dimension).repositories
-            for dimension in ("model", "scenario", "region", "variable")
+            for dimension in ("scenario", "region", "variable")
             if getattr(self, dimension).repositories
         }
 
 
 class MappingRepository(BaseModel):
+    """Configuration for a mapping repository."""
+
     name: str
     include: list[str] = ["*"]
 
@@ -196,6 +219,8 @@ def match_models(self, models: list[str]) -> list[str]:
 
 
 class RegionMappingConfig(BaseModel):
+    """Configuration for region mapping/aggregation external repositories."""
+
     repositories: list[MappingRepository] = Field(
         default_factory=list, alias="repository"
     )
@@ -217,13 +242,25 @@ def convert_to_set_of_repos(cls, v):
         return v
 
 
+class ProcessorConfig(BaseModel):
+    """Configuration for region processor settings."""
+
+    nuts: list[str] = Field(default_factory=list, alias="nuts-processor")
+    region_processor: bool = Field(default=False, alias="region-processor")
+
+    model_config = ConfigDict(
+        validate_by_name=True, validate_by_alias=True, extra="forbid"
+    )
+
+
 class TimeDomainConfig(BaseModel):
+    """Configuration for time domain validation settings."""
+
     year_allowed: bool = Field(default=True, alias="year")
     datetime_allowed: bool = Field(default=False, alias="datetime")
     timezone: str | None = Field(
         default=None,
         pattern=r"^UTC([+-])(1[0-4]|0?[0-9]):([0-5][0-9])$",
-        # pattern_msg="Invalid timezone format. Expected format: 'UTC±HH:MM'."
     )
 
     model_config = ConfigDict(validate_by_name=True, validate_by_alias=True)
@@ -305,6 +342,9 @@ class NomenclatureConfig(BaseModel):
     repositories: dict[str, Repository] = Field(default_factory=dict)
     definitions: DataStructureConfig = Field(default_factory=DataStructureConfig)
     mappings: RegionMappingConfig = Field(default_factory=RegionMappingConfig)
+    processor: ProcessorConfig = Field(
+        default_factory=ProcessorConfig, alias="processors"
+    )
     illegal_characters: list[str] = Field(
         default=[":", ";", '"'], alias="illegal-characters"
     )
@@ -326,6 +366,7 @@ def check_illegal_chars(cls, v: str | list[str]) -> list[str]:
     def check_definitions_repository(
         cls, v: "NomenclatureConfig"
     ) -> "NomenclatureConfig":
+        """Check that all repositories referenced in definitions and mappings exist."""
         mapping_repos = {"mappings": v.mappings.repositories} if v.mappings else {}
         repos: dict[str, list[MappingRepository]] = {
             **v.definitions.repos,
@@ -337,6 +378,16 @@ def check_definitions_repository(
                 raise ValueError((f"Unknown repository {unknown_repos} in '{use}'."))
         return v
 
+    @model_validator(mode="after")
+    @classmethod
+    def check_nuts_consistency(cls, v: "NomenclatureConfig") -> "NomenclatureConfig":
+        if v.processor.nuts and not v.definitions.region.nuts:
+            raise ValueError(
+                "`nuts` region processor set but no NUTS regions in `definitions`. "
+                "To fix, set `definitions.regions.nuts` to True."
+            )
+        return v
+
     def fetch_repos(self, target_folder: Path):
         for repo_name, repo in self.repositories.items():
             repo.fetch_repo(target_folder / repo_name)