Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 32 additions & 2 deletions src/pudl/metadata/fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -1571,6 +1571,21 @@
"description": "Land area in square meters.",
"unit": "square meters",
},
"lng_inventory_volume": {
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We add the units to the end of the column name typically, so these should wind up being lng_facility_volume_mcf, etc.

"type": "number",
"description": (
"Liquefied natural gas (LNG) facility inventory volume "
"at end of year. Reference conditions for measurement are 14.73 psia and 60° Fahrenheit."
),
"unit": "Mcf",
},
"lng_inventory_capacity": {
"type": "number",
"description": (
"Liquefied natural gas (LNG) facility inventory capacity, MMcf per day",
),
"unit": "MMcf",
},
"water_area": {
"type": "number",
"description": "Water area in square meters.",
Expand Down Expand Up @@ -4119,6 +4134,19 @@
"type": "number",
"description": "Longitude of the plant's location, in degrees.",
},
"marine_terminal_facility_volume": {
"type": "number",
"description": (
"Marine terminal facility inventory volume "
"at end of year. Reference conditions for measurement are 14.73 psia and 60° Fahrenheit."
),
"unit": "Mcf",
},
"marine_terminal_facility_capacity": {
"type": "number",
"description": ("Marine terminal facility inventory capacity, MMcf per day",),
"unit": "MMcf",
},
"mains_miles": {
"type": "number",
"description": "The miles of mains distribution pipeline.",
Expand Down Expand Up @@ -8550,8 +8578,10 @@
},
"revenue_class": {
"type": "string",
"description": "Source of revenue: whether revenue originates from gas owned directly by the "
"operator (sales) or gas transported by the operator (transport).",
"description": (
"Source of revenue: whether revenue originates from gas owned directly by the "
"operator (sales) or gas transported by the operator (transport)."
),
"constraints": {"enum": REVENUE_CLASSES_EIA176},
},
},
Expand Down
24 changes: 23 additions & 1 deletion src/pudl/metadata/resources/eia176.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,27 @@
"field_namespace": "eia",
"sources": ["eia176"],
"etl_group": "eia176",
}
},
"core_eia176__yearly_liquefied_natural_gas_inventory": {
"description": {
"additional_summary_text": "LNG storage volume at end of the year",
"additional_source_text": "(Part 5, Lines 8.0-8.2)",
},
"schema": {
"fields": [
"operator_id_eia",
"report_year",
"operating_state",
"lng_inventory_volume",
"lng_facility_volume",
"marine_terminal_facility_volume",
"lng_facility_capacity",
"marine_terminal_facility_capacity",
],
"primary_key": [
"operator_id_eia",
"report_year",
],
},
},
}
86 changes: 75 additions & 11 deletions src/pudl/transform/eia176.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,17 +319,7 @@ def core_eia176__yearly_gas_disposition_by_consumer(

df = _core_eia176__yearly_company_data.filter(primary_key + keep)

# Normalize operating states, those that are missing in subdivisions will be NA
codes = (
core_pudl__codes_subdivisions.assign(
key=lambda d: d["subdivision_name"].str.strip().str.casefold()
)
.drop_duplicates("key")
.set_index("key")["subdivision_code"]
)
df["operating_state"] = (
df["operating_state"].str.strip().str.casefold().map(codes.get)
)
_normalize_operating_states(df, core_pudl__codes_subdivisions)

df = pd.melt(
df, id_vars=primary_key, var_name="metric", value_name="value"
Expand Down Expand Up @@ -359,3 +349,77 @@ def core_eia176__yearly_gas_disposition_by_consumer(
df = df.dropna(subset=["consumers", "revenue", "volume_mcf"], how="all")

return df


@asset(io_manager_key="pudl_io_manager")
def core_eia176__yearly_liquefied_natural_gas_inventory(
_core_eia176__yearly_company_data: pd.DataFrame,
core_pudl__codes_subdivisions: pd.DataFrame,
) -> pd.DataFrame:
"""Produce annual information about an operator's LNG storage volume

Args:
_core_eia176__yearly_company_data: Wide company-level EIA-176 data with
per-metric columns.
core_pudl__codes_subdivisions: Mapping from ``subdivision_name`` to
``subdivision_code`` used to normalize ``operating_state``.
"""
other = ["operating_state"]

keep = [
"lng_inventory_at_end_of_year_volume",
"lng_facility_year_end_volume",
"marine_terminal_facility_year_end_volume",
"lng_facility_year_end_capacity",
"marine_terminal_facility_year_end_capacity",
]

primary_key = ["operator_id_eia", "report_year"]

df = _core_eia176__yearly_company_data.filter(primary_key + other + keep)

# ensure uniueness
assert not df.duplicated(primary_key, keep=False).any()
df = df.set_index(primary_key).reset_index().dropna(subset=keep, how="all")

_normalize_operating_states(df, core_pudl__codes_subdivisions)
df = df.dropna(subset=["operating_state"])

df = df.rename(
columns={
"lng_inventory_at_end_of_year_volume": "lng_inventory_volume",
"lng_facility_year_end_volume": "lng_facility_volume",
"marine_terminal_facility_year_end_volume": (
"marine_terminal_facility_volume"
),
"lng_facility_year_end_capacity": "lng_facility_capacity",
"marine_terminal_facility_year_end_capacity": (
"marine_terminal_facility_capacity"
),
}
)

return df


def _normalize_operating_states(
df: pd.DataFrame, core_pudl__codes_subdivisions: pd.DataFrame
) -> None:
"""Normalize operating states in-place, those that are missing in subdivisions will be NA

Args:
df: DataFrame with ``operating_state`` column to modify
core_pudl__codes_subdivisions: Mapping from ``subdivision_name`` to
``subdivision_code`` used to normalize ``operating_state``.

"""
codes = (
core_pudl__codes_subdivisions.assign(
key=lambda d: d["subdivision_name"].str.strip().str.casefold()
)
.drop_duplicates("key")
.set_index("key")["subdivision_code"]
)
df["operating_state"] = (
df["operating_state"].str.strip().str.casefold().map(codes.get)
)
Loading