diff --git a/geetools/ee_feature_collection.py b/geetools/ee_feature_collection.py index c4f58551..1397e825 100644 --- a/geetools/ee_feature_collection.py +++ b/geetools/ee_feature_collection.py @@ -796,3 +796,48 @@ def areaSort(self, ascending: bool = True) -> ee.FeatureCollection: # sort by area and remove the property from the output properties = fc.first().propertyNames().remove(name) return fc.sort(name, ascending).map(lambda feat: feat.select(properties)) + + def split(self, parts: int | ee.Number = 1) -> ee.List: + """Split a feature collection in multiple parts. + + Args: + parts: The number of parts to split the collection in + + Returns: + A list of ee.FeatureCollection + + Examples: + .. jupyter-execute:: + + import ee, geetools + from geetools.utils import initialize_documentation + + initialize_documentation() + + fc = ee.FeatureCollection("FAO/GAUL/2015/level0") + fc.geetools.split(2).get(1).size().getInfo() + """ + # if parts == 1, then we simply return the initial collection in a list (for consistency) + # in order to avoid extra computation from GEE side + if parts == 1: + return ee.List([self._obj]) + + # extract the properties of the ee.FeatureCollection object before building + # the random column + properties = self._obj.first().propertyNames() + + # build the sequence of steps to split the collection + parts = ee.Number(parts) + steps = ee.List.sequence(0, 1, count=parts.add(1).toInt()) + stepsIndex = ee.List.sequence(0, steps.size().subtract(2)) + + # split the collection and drop the random temp column + prop = ee.String("__geetools_split_prop__") + fc = self._obj.randomColumn(prop) + + def split(i): + i, j = ee.Number(i), ee.Number(i).add(1) + col = fc.filter(ee.Filter.rangeContains(prop, steps.get(i), steps.get(j))) + return ee.FeatureCollection(col).select(properties) + + return ee.List(stepsIndex.map(split)) diff --git a/tests/test_FeatureCollection.py b/tests/test_FeatureCollection.py index 28b7bfa8..e8f4e161 100644 --- a/tests/test_FeatureCollection.py +++ b/tests/test_FeatureCollection.py @@ -371,3 +371,14 @@ def test_area_sort(self, ee_list_regression): fc = fc.geetools.areaSort() property = fc.aggregate_array("ADM0_NAME") ee_list_regression.check(property) + + +class TestSplit: + """Test the ``split`` method.""" + + def test_split(self, ee_dictionary_regression): + asset = "FAO/GAUL/2015/level0" + fc = ee.FeatureCollection(asset).limit(10) + fc = ee.FeatureCollection(fc.geetools.split(3).get(0)) + dict = fc.geetools.toDictionary() + ee_dictionary_regression.check(dict) diff --git a/tests/test_FeatureCollection/serialized_test_split.yml b/tests/test_FeatureCollection/serialized_test_split.yml new file mode 100644 index 00000000..a5112557 --- /dev/null +++ b/tests/test_FeatureCollection/serialized_test_split.yml @@ -0,0 +1,206 @@ +result: '0' +values: + '0': + functionInvocationValue: + arguments: + keys: + functionInvocationValue: + arguments: + baseAlgorithm: + functionDefinitionValue: + argumentNames: + - _MAPPING_VAR_0_0 + body: '1' + dropNulls: + constantValue: false + list: + functionInvocationValue: + arguments: + collection: + valueReference: '2' + property: + constantValue: system:index + functionName: AggregateFeatureCollection.array + functionName: List.map + values: + functionInvocationValue: + arguments: + baseAlgorithm: + functionDefinitionValue: + argumentNames: + - _MAPPING_VAR_2_0 + body: '8' + dropNulls: + constantValue: false + list: + functionInvocationValue: + arguments: + collection: + valueReference: '2' + count: + functionInvocationValue: + arguments: + collection: + valueReference: '2' + functionName: Collection.size + functionName: Collection.toList + functionName: List.map + functionName: Dictionary.fromLists + '1': + functionInvocationValue: + arguments: + input: + argumentReference: _MAPPING_VAR_0_0 + functionName: String + '2': + functionInvocationValue: + arguments: + index: + constantValue: 0 + list: + functionInvocationValue: + arguments: + baseAlgorithm: + functionDefinitionValue: + argumentNames: + - _MAPPING_VAR_1_0 + body: '3' + dropNulls: + constantValue: false + list: + functionInvocationValue: + arguments: + end: + functionInvocationValue: + arguments: + left: + functionInvocationValue: + arguments: + list: + valueReference: '7' + functionName: List.size + right: + constantValue: 2 + functionName: Number.subtract + start: + constantValue: 0 + functionName: List.sequence + functionName: List.map + functionName: List.get + '3': + functionInvocationValue: + arguments: + baseAlgorithm: + functionDefinitionValue: + argumentNames: + - _MAPPING_VAR_0_0 + body: '4' + collection: + functionInvocationValue: + arguments: + collection: + functionInvocationValue: + arguments: + collection: + valueReference: '5' + columnName: + valueReference: '6' + functionName: Collection.randomColumn + filter: + functionInvocationValue: + arguments: + field: + valueReference: '6' + maxValue: + functionInvocationValue: + arguments: + index: + functionInvocationValue: + arguments: + left: + argumentReference: _MAPPING_VAR_1_0 + right: + constantValue: 1 + functionName: Number.add + list: + valueReference: '7' + functionName: List.get + minValue: + functionInvocationValue: + arguments: + index: + argumentReference: _MAPPING_VAR_1_0 + list: + valueReference: '7' + functionName: List.get + functionName: Filter.rangeContains + functionName: Collection.filter + functionName: Collection.map + '4': + functionInvocationValue: + arguments: + input: + argumentReference: _MAPPING_VAR_0_0 + propertySelectors: + functionInvocationValue: + arguments: + element: + functionInvocationValue: + arguments: + collection: + valueReference: '5' + functionName: Collection.first + functionName: Element.propertyNames + retainGeometry: + constantValue: true + functionName: Feature.select + '5': + functionInvocationValue: + arguments: + collection: + functionInvocationValue: + arguments: + tableId: + constantValue: FAO/GAUL/2015/level0 + functionName: Collection.loadTable + limit: + constantValue: 10 + functionName: Collection.limit + '6': + constantValue: __geetools_split_prop__ + '7': + functionInvocationValue: + arguments: + count: + functionInvocationValue: + arguments: + left: + functionInvocationValue: + arguments: + input: + constantValue: 3 + functionName: Number.toInt + right: + constantValue: 1 + functionName: Number.add + end: + constantValue: 1 + start: + constantValue: 0 + functionName: List.sequence + '8': + functionInvocationValue: + arguments: + element: + argumentReference: _MAPPING_VAR_2_0 + properties: + functionInvocationValue: + arguments: + element: + functionInvocationValue: + arguments: + collection: + valueReference: '2' + functionName: Collection.first + functionName: Element.propertyNames + functionName: Element.toDictionary diff --git a/tests/test_FeatureCollection/test_split.yml b/tests/test_FeatureCollection/test_split.yml new file mode 100644 index 00000000..3212da86 --- /dev/null +++ b/tests/test_FeatureCollection/test_split.yml @@ -0,0 +1,40 @@ +0000000000000000000c: + ADM0_CODE: 2648 + ADM0_NAME: Serbia + DISP_AREA: 'NO' + EXP0_YEAR: 3000 + STATUS: Member State + STR0_YEAR: 2006 + Shape_Area: 9.934731 + Shape_Leng: 24.909326 + system:index: 0000000000000000000c +0000000000000000002e: + ADM0_CODE: 74 + ADM0_NAME: South Sudan + DISP_AREA: 'NO' + EXP0_YEAR: 3000 + STATUS: Member State + STR0_YEAR: 2011 + Shape_Area: 51.599166 + Shape_Leng: 46.905431 + system:index: 0000000000000000002e +0000000000000000004c: + ADM0_CODE: 6 + ADM0_NAME: Sudan + DISP_AREA: 'NO' + EXP0_YEAR: 3000 + STATUS: Member State + STR0_YEAR: 2011 + Shape_Area: 155.888802 + Shape_Leng: 81.910242 + system:index: 0000000000000000004c +000000000000000000b5: + ADM0_CODE: 82 + ADM0_NAME: Faroe Islands + DISP_AREA: 'NO' + EXP0_YEAR: 3000 + STATUS: DK Territory + STR0_YEAR: 1000 + Shape_Area: 0.240299 + Shape_Leng: 15.952426 + system:index: 000000000000000000b5