Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion ftw_tools/inference/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import geopandas as gpd
import numpy as np
import shapely
from fiboa_cli.parquet import create_parquet

from ftw_tools.parquet_utils import create_parquet


def merge_polygons(
Expand Down
78 changes: 78 additions & 0 deletions ftw_tools/parquet_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""Utility functions for writing GeoParquet files with fiboa metadata.

This module provides lightweight replacements for fiboa-cli's create_parquet
and features_to_dataframe functions to avoid the flatdict dependency.
"""

import json

import geopandas as gpd
import pyarrow.parquet as pq
import shapely.geometry


def features_to_dataframe(features, columns):
"""Convert GeoJSON-like features to a GeoDataFrame.

Args:
features: List of GeoJSON-like feature dictionaries with geometry and properties.
columns: List of column names to include in the GeoDataFrame.

Returns:
GeoDataFrame with the features converted to rows, using EPSG:4326 CRS.

Note:
All input features are assumed to be in WGS84 (EPSG:4326) coordinates.
"""
rows = []
for feature in features:
feature_id = feature.get("id")
geometry = (
shapely.geometry.shape(feature["geometry"])
if "geometry" in feature
else None
)
row = {
"id": feature_id,
"geometry": geometry,
}
properties = feature.get("properties", {})
row.update(properties)
rows.append(row)

return gpd.GeoDataFrame(rows, columns=columns, geometry="geometry", crs="EPSG:4326")


def create_parquet(data, columns, collection, output_file, config=None, compression=None):
"""Write a GeoDataFrame to a Parquet file with fiboa metadata.

Args:
data: GeoDataFrame to write.
columns: List of column names to include in the output.
collection: Dictionary with fiboa collection metadata.
output_file: Path to the output Parquet file.
config: Configuration dictionary (optional, retained for API compatibility).
compression: Compression algorithm to use (default: 'zstd').

Note:
The config parameter is retained for API compatibility with the original
fiboa-cli interface but is not used in this implementation.
"""
if compression is None:
compression = "zstd"

# Write to Parquet using geopandas built-in method
# which handles geometry serialization properly
data[columns].to_parquet(
output_file,
compression=compression,
index=False,
)

# Now reopen and add the fiboa metadata
# This is needed to add custom metadata to the parquet file
parquet_file = pq.read_table(output_file)
existing_metadata = parquet_file.schema.metadata or {}
existing_metadata[b"fiboa"] = json.dumps(collection).encode("utf-8")
parquet_file = parquet_file.replace_schema_metadata(existing_metadata)
pq.write_table(parquet_file, output_file, compression=compression)
2 changes: 1 addition & 1 deletion ftw_tools/postprocess/polygonize.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
import rasterio.features
import shapely.geometry
from affine import Affine
from fiboa_cli.parquet import create_parquet, features_to_dataframe
from pyproj import CRS, Transformer
from rtree import index
from shapely.ops import transform, unary_union
from skimage.morphology import dilation, erosion
from tqdm import tqdm

from ftw_tools.parquet_utils import create_parquet, features_to_dataframe
from ftw_tools.settings import SUPPORTED_POLY_FORMATS_TXT


Expand Down
1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ classifiers = [
dependencies = [
"click>=8.2.1,<9",
"dask[distributed]>=2025.5.1",
"fiboa-cli==0.7",
"fiona>=1.9,<2",
"geopandas>=0.14,<2",
"kornia>=0.7,<1",
Expand Down
Loading