diff --git a/demo/extras/filter_tests.ipynb b/demo/extras/filter_tests.ipynb new file mode 100644 index 00000000..a1780564 --- /dev/null +++ b/demo/extras/filter_tests.ipynb @@ -0,0 +1,1733 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# test suite for filter option\n", + "import copy\n", + "import json\n", + "import geopandas as gpd\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import pandas as pd\n", + "import pystac\n", + "import warnings\n", + "import xarray as xr\n", + "\n", + "from datetime import datetime\n", + "from pystac_client import Client\n", + "from shapely.geometry import box\n", + "\n", + "import semantique as sq\n", + "from semantique.processor.core import QueryProcessor, FilterProcessor" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "class TestSuite:\n", + " def __init__(self, ttype=\"generated\"):\n", + " \"\"\"\n", + " Creates a suite of test cases for the FilterProcessor class.\n", + "\n", + " Args:\n", + " ttype (str): Type of the test suite. Options are \"generated\" and \"real\".\n", + " If \"generated\", the timestamps will be generated regularly spaced for given interval.\n", + " If \"real\", the timestamps will be retrieved by quering an actual catalog of satellite data.\n", + " \"\"\"\n", + " self.ttype = ttype\n", + " # create an empty list to store test cases\n", + " self.tests = []\n", + " # define the maximum number of items in the result\n", + " self.max_items = None\n", + " # define general mapping\n", + " with open(\"../files/mapping.json\", \"r\") as file:\n", + " mapping = sq.mapping.Semantique(json.load(file))\n", + " mapping[\"entity\"] = {}\n", + " mapping[\"entity\"][\"water\"] = {\"color\": sq.appearance(\"colortype\").evaluate(\"in\", [21, 22, 23, 24])}\n", + " mapping[\"entity\"][\"vegetation\"] = {\"color\": sq.appearance(\"colortype\").evaluate(\"in\", [1, 2, 3, 4, 5, 6])}\n", + " mapping[\"entity\"][\"builtup\"] = {\"color\": sq.appearance(\"colortype\").evaluate(\"in\", [13, 14, 15, 16, 17])}\n", + " mapping[\"entity\"][\"cloud\"] = {\"color\": sq.atmosphere(\"colortype\").evaluate(\"equal\", 25)}\n", + " mapping[\"entity\"][\"snow\"] = {\"color\": sq.appearance(\"colortype\").evaluate(\"in\", [29, 30])}\n", + " mapping[\"entity\"][\"blue_band\"] = {\"color\": sq.reflectance(\"s2_band02\")}\n", + " mapping[\"entity\"][\"green_band\"] = {\"color\": sq.reflectance(\"s2_band03\")}\n", + " mapping[\"entity\"][\"red_band\"] = {\"color\": sq.reflectance(\"s2_band04\")}\n", + " mapping[\"entity\"][\"NDVI\"] = {\"color\": sq.reflectance(\"s2_band08\").\\\n", + " evaluate(\"normalized_difference\", sq.reflectance(\"s2_band04\"))}\n", + " mapping[\"entity\"][\"forest\"] = {\n", + " \"appearance\": (\n", + " sq.reflectance(\"s2_band08\")\n", + " .evaluate(\"normalized_difference\", sq.reflectance(\"s2_band04\"))\n", + " .filter_time(\"year\", \"equal\", 2020) \n", + " .reduce(\"mean\", \"time\")\n", + " .evaluate(\"greater\", 0.4)\n", + " )\n", + " }\n", + " mapping[\"entity\"][\"valid_obs\"] = {\"color\": sq.appearance(\"colortype\").evaluate(\"in\", [4,5,6])}\n", + " mapping[\"entity\"][\"all\"] = {\"color\": sq.appearance(\"colortype\").evaluate(\"not_equal\", 0)}\n", + "\n", + " # define general datacube layout\n", + " with open(\"../files/layout_gtiff.json\", \"r\") as file:\n", + " dc = sq.datacube.GeotiffArchive(json.load(file), src = \"../files/layers_gtiff.zip\")\n", + " # set basic spatio-temporal extent\n", + " space = sq.SpatialExtent(gpd.read_file(\"../files/footprint.geojson\"))\n", + " time = sq.TemporalExtent(\"2019-01-01\", \"2020-12-31\")\n", + " # compile context config \n", + " self.context = {\n", + " \"datacube\": dc, \n", + " \"mapping\": mapping,\n", + " \"space\": space,\n", + " \"time\": time,\n", + " \"crs\": 3035, \n", + " \"tz\": \"UTC\", \n", + " \"spatial_resolution\": [-10, 10],\n", + " \"track_types\": False,\n", + " \"meta_timestamps\": self._get_timestamps()\n", + " }\n", + "\n", + " def _get_timestamps(self):\n", + " if self.ttype == \"generated\":\n", + " meta_timestamps = pd.date_range(\n", + " start = '1960-01-01',\n", + " end = datetime.now(),\n", + " freq = 'h'\n", + " )\n", + " meta_timestamps = pd.to_datetime(meta_timestamps).sort_values()\n", + " return meta_timestamps\n", + " elif self.ttype == \"real\":\n", + " # define temporal & spatial range to perform STAC query\n", + " xmin, ymin, xmax, ymax = -2.75, 47.25, -2.25, 47.75\n", + " aoi = box(xmin, ymin, xmax, ymax)\n", + " t_range = [\"2018-07-15\", \"2020-12-01\"]\n", + " # STAC-based metadata retrieval\n", + " catalog = Client.open(\"https://earth-search.aws.element84.com/v1\")\n", + " query = catalog.search(\n", + " collections=\"sentinel-2-l2a\", \n", + " datetime=t_range, \n", + " limit=100, \n", + " intersects=aoi\n", + " )\n", + " stac_json = query.item_collection_as_dict()\n", + " gdf = gpd.GeoDataFrame.from_features(stac_json, \"epsg:4326\")\n", + " meta_timestamps = pd.to_datetime(gdf.datetime.drop_duplicates())\n", + " return meta_timestamps\n", + " else:\n", + " raise ValueError(\"Invalid value for timestamps. Options are 'generated' and 'real'.\")\n", + "\n", + " def populate(self):\n", + " # define base entities\n", + " recipe = sq.QueryRecipe()\n", + " red_band = sq.reflectance(\"s2_band04\")\n", + " green_band = sq.reflectance(\"s2_band03\")\n", + " blue_band = sq.reflectance(\"s2_band02\")\n", + "\n", + " # define test no. 1\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(red_band, green_band, blue_band).\\\n", + " concatenate(\"band\").\\\n", + " filter(sq.entity(\"cloud\").evaluate(\"not\"))\n", + " result = {\n", + " 'atmosphere_colortype': self.max_items,\n", + " 'reflectance_s2_band02': self.max_items,\n", + " 'reflectance_s2_band03': self.max_items,\n", + " 'reflectance_s2_band04': self.max_items\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.1\",\n", + " \"desc\": \"No temporal filter\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 2a \n", + " # test recipe\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(\n", + " blue_band.filter(sq.self().extract(\"time\").evaluate(\"during\", sq.time_interval(\"2021-01-01\", \"2021-12-31\"))),\n", + " green_band.filter_time(\"year\", \"greater\", 2020).filter_time(\"year\", \"less\", 2022), \n", + " red_band.filter(sq.self().extract(\"time\", \"year\").evaluate(\"less\", 2015)) \n", + " ).\\\n", + " concatenate(\"band\")\n", + " # expected number of items in result\n", + " result = {\n", + " 'reflectance_s2_band02': 8737,\n", + " 'reflectance_s2_band03': 8760,\n", + " 'reflectance_s2_band04': 482136\n", + " }\n", + " # test & expected result + test description\n", + " self.tests.append({\n", + " \"name\": \"no.2a\",\n", + " \"desc\": \"Various temporal filters applied directly to data layers\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 2b\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"blue_I\"] = sq.collection(\n", + " blue_band.filter(sq.self().extract(\"time\").evaluate(\"during\", sq.time_interval(\"2021-01-01\", \"2021-12-31\"))),\n", + " blue_band.filter(sq.self().extract(\"time\").evaluate(\"before\", sq.time_instant(\"2021-01-01\"))),\n", + " ).\\\n", + " concatenate(\"band\")\n", + " recipe[\"blue_II\"] = sq.collection(\n", + " blue_band.filter(sq.self().extract(\"time\").evaluate(\"after\", sq.time_instant(\"2021-12-31\")))\n", + " ).\\\n", + " concatenate(\"band\")\n", + " recipe[\"red_green\"] = sq.collection(\n", + " green_band.filter_time(\"year\", \"less\", 2015),\n", + " red_band.filter_time(\"year\", \"greater\", 2010).filter_time(\"year\", \"less\", 2020)\n", + " ).\\\n", + " concatenate(\"band\")\n", + " result = {\n", + " 'reflectance_s2_band02': self.max_items,\n", + " 'reflectance_s2_band03': 482136,\n", + " 'reflectance_s2_band04': 78888\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.2b\",\n", + " \"desc\": \"Temporal filters across different results\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 3a\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(red_band, green_band, blue_band).\\\n", + " concatenate(\"band\").\\\n", + " filter(sq.entity(\"cloud\").evaluate(\"not\")).\\\n", + " evaluate(\"subtract\", blue_band.filter_time(\"before\", sq.time_instant(\"2019-12-31\")))\n", + " result = {\n", + " 'atmosphere_colortype': 525936,\n", + " 'reflectance_s2_band02': 525936,\n", + " 'reflectance_s2_band03': 525936,\n", + " 'reflectance_s2_band04': 525936\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.3a\",\n", + " \"desc\": \"Temporal filter applied indirectly via evaluate as part of filter object (algebraic operators)\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 3b\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(red_band, green_band).\\\n", + " concatenate(\"band\").\\\n", + " filter(sq.entity(\"cloud\").evaluate(\"not\")).\\\n", + " evaluate(\"or\", blue_band.filter_time(\"before\", sq.time_instant(\"2019-12-31\")))\n", + " result = {\n", + " 'atmosphere_colortype': self.max_items,\n", + " 'reflectance_s2_band02': 525936,\n", + " 'reflectance_s2_band03': self.max_items,\n", + " 'reflectance_s2_band04': self.max_items\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.3b\",\n", + " \"desc\": \"Temporal filter applied indirectly via evaluate as part of filter object (boolean/relational/membership operators)\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + " \n", + " # define test no. 3c\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(red_band, green_band, blue_band).\\\n", + " concatenate(\"band\").\\\n", + " filter(sq.entity(\"cloud\").evaluate(\"not\")).\\\n", + " filter_time(\"before\", sq.time_instant(\"2019-12-31\")).\\\n", + " evaluate(\"subtract\", blue_band)\n", + " result = {\n", + " 'atmosphere_colortype': 525936,\n", + " 'reflectance_s2_band02': 525936,\n", + " 'reflectance_s2_band03': 525936,\n", + " 'reflectance_s2_band04': 525936\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.3c\",\n", + " \"desc\": \"Temporal filter applied indirectly via evaluate as part of object to be filtered (algebraic operators), output equivalent to 3a\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 3d\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(red_band, green_band).\\\n", + " concatenate(\"band\").\\\n", + " filter(sq.entity(\"cloud\").evaluate(\"not\")).\\\n", + " filter_time(\"before\", sq.time_instant(\"2019-12-31\")).\\\n", + " evaluate(\"or\", blue_band)\n", + " result = {\n", + " 'atmosphere_colortype': 525936,\n", + " 'reflectance_s2_band02': self.max_items,\n", + " 'reflectance_s2_band03': 525936,\n", + " 'reflectance_s2_band04': 525936\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.3d\",\n", + " \"desc\": \"Temporal filter applied indirectly via evaluate as part of object to be filtered (boolean/relational/membership operators), output should be equivalent to 3b but is not due to the way NaNs are handled (https://github.com/ZGIS/semantique/issues/54)\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 3e\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(green_band).\\\n", + " concatenate(\"band\").\\\n", + " filter(sq.entity(\"cloud\").filter_time(\"before\", sq.time_instant(\"2019-12-31\")).evaluate(\"not\")).\\\n", + " evaluate(\"subtract\", blue_band)\n", + " result = {\n", + " 'atmosphere_colortype': 525936,\n", + " 'reflectance_s2_band02': 525936,\n", + " 'reflectance_s2_band03': 525936\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.3e\",\n", + " \"desc\": \"Temporal filter applied indirectly via filter with entity\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 3f\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(green_band, blue_band).\\\n", + " concatenate(\"band\").\\\n", + " filter(sq.entity(\"cloud\").filter_time(\"before\", sq.time_instant(\"2019-12-31\")).evaluate(\"not\")).\\\n", + " evaluate(\"subtract\", blue_band)\n", + " recipe[\"composite_II\"] = sq.collection(red_band, blue_band).\\\n", + " concatenate(\"band\").\\\n", + " filter(sq.entity(\"cloud\").evaluate(\"not\")).\\\n", + " evaluate(\"or\", blue_band.filter_time(\"before\", sq.time_instant(\"2019-12-31\")))\n", + " result = {\n", + " 'atmosphere_colortype': self.max_items,\n", + " 'reflectance_s2_band02': self.max_items,\n", + " 'reflectance_s2_band03': 525936,\n", + " 'reflectance_s2_band04': self.max_items\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.3f\",\n", + " \"desc\": \"Temporal filter applied indirectly via filter with entity\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 4a\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(red_band, green_band, blue_band).\\\n", + " concatenate(\"band\").\\\n", + " filter(\n", + " sq.collection(\n", + " sq.self().extract(\"time\", \"year\").evaluate(\"greater\", 2015),\n", + " sq.self().extract(\"time\", \"year\").evaluate(\"less\", 2020),\n", + " ).merge(\"all\")\n", + " ).\\\n", + " filter(sq.entity(\"cloud\").evaluate(\"not\")).\\\n", + " evaluate(\"subtract\", blue_band)\n", + " result = {\n", + " 'atmosphere_colortype': 35064,\n", + " 'reflectance_s2_band02': 35064,\n", + " 'reflectance_s2_band03': 35064,\n", + " 'reflectance_s2_band04': 35064\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.4a\",\n", + " \"desc\": \"multiple temporal filter organised in a collection\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 4b\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(red_band, green_band, blue_band).\\\n", + " concatenate(\"band\").\\\n", + " filter(\n", + " sq.collection(\n", + " sq.self().extract(\"time\", \"year\").evaluate(\"greater\", 2015),\n", + " sq.self().extract(\"time\", \"year\").evaluate(\"less\", 2020),\n", + " sq.self().evaluate(\"less\", 1)\n", + " ).merge(\"all\")\n", + " ).\\\n", + " filter(sq.entity(\"cloud\").evaluate(\"not\")).\\\n", + " evaluate(\"subtract\", blue_band)\n", + " result = {\n", + " 'atmosphere_colortype': 35064,\n", + " 'reflectance_s2_band02': 35064,\n", + " 'reflectance_s2_band03': 35064,\n", + " 'reflectance_s2_band04': 35064\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.4b\",\n", + " \"desc\": \"multiple temporal filter and non-blocking non-temporal filter organised in a collection\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 4c\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(red_band, green_band, blue_band).\\\n", + " concatenate(\"band\").\\\n", + " filter(\n", + " sq.collection(\n", + " sq.self().extract(\"time\", \"year\").evaluate(\"less\", 2000),\n", + " sq.self().extract(\"time\", \"year\").evaluate(\"greater\", 2020),\n", + " sq.self().evaluate(\"less\", 1)\n", + " ).merge(\"any\")\n", + " ).\\\n", + " filter(sq.entity(\"cloud\").evaluate(\"not\")).\\\n", + " evaluate(\"subtract\", blue_band)\n", + " result = {\n", + " 'atmosphere_colortype': self.max_items,\n", + " 'reflectance_s2_band02': self.max_items,\n", + " 'reflectance_s2_band03': self.max_items,\n", + " 'reflectance_s2_band04': self.max_items\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.4c\",\n", + " \"desc\": \"multiple temporal filter and blocking non-temporal filter organised in a collection\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 5a\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(red_band, green_band, blue_band).\\\n", + " concatenate(\"band\").\\\n", + " filter_time(\"before\", sq.time_instant(\"2019-12-31\")).\\\n", + " fill(\"time\", \"nearest\")\n", + " result = {\n", + " 'reflectance_s2_band02': 525936,\n", + " 'reflectance_s2_band03': 525936,\n", + " 'reflectance_s2_band04': 525936\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.5a\",\n", + " \"desc\": \"temporal filter followed by fill operation\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 5b\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(red_band, green_band, blue_band).\\\n", + " concatenate(\"band\").\\\n", + " fill(\"time\", \"nearest\").\\\n", + " filter_time(\"before\", sq.time_instant(\"2019-12-31\"))\n", + " result = {\n", + " 'reflectance_s2_band02': self.max_items,\n", + " 'reflectance_s2_band03': self.max_items,\n", + " 'reflectance_s2_band04': self.max_items,\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.5b\",\n", + " \"desc\": \"fill operation followed by temporal filter\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 5c\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(red_band, green_band, blue_band).\\\n", + " concatenate(\"band\").\\\n", + " filter_time(\"before\", sq.time_instant(\"2019-12-31\")).\\\n", + " smooth(\"mean\", \"time\", 3)\n", + " result = {\n", + " 'reflectance_s2_band02': 525936,\n", + " 'reflectance_s2_band03': 525936,\n", + " 'reflectance_s2_band04': 525936\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.5c\",\n", + " \"desc\": \"temporal filter followed by smooth operation\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 5d\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(red_band, green_band, blue_band).\\\n", + " concatenate(\"band\").\\\n", + " smooth(\"mean\", \"time\", 3).\\\n", + " filter_time(\"before\", sq.time_instant(\"2019-12-31\"))\n", + " result = {\n", + " 'reflectance_s2_band02': self.max_items,\n", + " 'reflectance_s2_band03': self.max_items,\n", + " 'reflectance_s2_band04': self.max_items,\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.5d\",\n", + " \"desc\": \"smooth operation followed by temporal filter\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 6a\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(red_band, green_band, blue_band).\\\n", + " concatenate(\"band\").\\\n", + " filter_time(\"before\", sq.time_instant(\"2019-12-31\")).\\\n", + " assign(0, at = sq.self().evaluate(\"less\", 2))\n", + " result = {\n", + " 'reflectance_s2_band02': 525936,\n", + " 'reflectance_s2_band03': 525936,\n", + " 'reflectance_s2_band04': 525936,\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.6a\",\n", + " \"desc\": \"temporal filter followed by assign operation\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 6b\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(red_band, green_band, blue_band).\\\n", + " concatenate(\"band\").\\\n", + " filter_time(\"before\", sq.time_instant(\"2019-12-31\")).\\\n", + " assign(-99)\n", + " result = {\n", + " 'reflectance_s2_band02': 525936,\n", + " 'reflectance_s2_band03': 525936,\n", + " 'reflectance_s2_band04': 525936,\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.6b\",\n", + " \"desc\": \"temporal filter followed by assign operation\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 6c\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(red_band, green_band, blue_band).\\\n", + " concatenate(\"band\").\\\n", + " filter_time(\"before\", sq.time_instant(\"2019-12-31\")).\\\n", + " assign_time(\"month\")\n", + " result = {\n", + " 'reflectance_s2_band02': 525936,\n", + " 'reflectance_s2_band03': 525936,\n", + " 'reflectance_s2_band04': 525936,\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.6c\",\n", + " \"desc\": \"temporal filter followed by assign operation\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + "\n", + " # define test no. 7a\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"objects\"] = sq.reflectance(\"s2_band04\").\\\n", + " delineate().\\\n", + " filter_time(\"year\", \"less\", 2020).\\\n", + " reduce(\"mean\", \"time\")\n", + " result = {\n", + " 'reflectance_s2_band04': 525960,\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.7a\",\n", + " \"desc\": \"temporal filter before reduce\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 7b\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"objects\"] = sq.reflectance(\"s2_band04\").\\\n", + " delineate().\\\n", + " filter_time(\"year\", \"less\", 2020).\\\n", + " reduce(\"mean\")\n", + " result = {\n", + " 'reflectance_s2_band04': 525960,\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.7b\",\n", + " \"desc\": \"temporal filter before reduce\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 7c\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"objects\"] = sq.reflectance(\"s2_band04\").\\\n", + " delineate().\\\n", + " reduce(\"mean\", \"space\").\\\n", + " filter_time(\"year\", \"less\", 2020)\n", + " result = {\n", + " 'reflectance_s2_band04': 525960,\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.7c\",\n", + " \"desc\": \"temporal filter after reduce-over-space\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 7d\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"vegetation_count_per_season\"] = sq.reflectance(\"s2_band04\").\\\n", + " filter_time(\"year\", \"less\", 2020).\\\n", + " groupby(sq.collection(\n", + " sq.self().\\\n", + " filter_time(\"year\", \"less\", 2020).\\\n", + " extract(\"time\", \"month\"), \n", + " sq.self().\\\n", + " extract(\"time\", \"month\").\\\n", + " evaluate(\"not_in\", sq.interval(6, 11))\n", + " ).compose()).\\\n", + " reduce(\"count\", \"time\")\n", + " result = {\n", + " 'reflectance_s2_band04': 525960,\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.7d\",\n", + " \"desc\": \"temporal filter with reduce on collection\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + " \n", + " # define test no. 8a\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"objects\"] = sq.reflectance(\"s2_band04\").delineate().filter_time(\"year\", \"less\", 2020)\n", + " recipe[\"map\"] = sq.collection(blue_band, green_band, red_band).\\\n", + " concatenate(\"band\").\\\n", + " groupby(sq.result(\"objects\"))\n", + " result = {\n", + " 'reflectance_s2_band02': 525960,\n", + " 'reflectance_s2_band03': 525960,\n", + " 'reflectance_s2_band04': 525960,\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.8a\",\n", + " \"desc\": \"temporal filter indirectly via groupby\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 8b\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"objects\"] = sq.reflectance(\"s2_band04\").\\\n", + " delineate().\\\n", + " filter_time(\"year\", \"less\", 2020).\\\n", + " reduce(\"mean\", \"time\")\n", + " recipe[\"map\"] = sq.collection(blue_band, green_band, red_band).\\\n", + " concatenate(\"band\").\\\n", + " groupby(sq.result(\"objects\"))\n", + " result = {\n", + " 'reflectance_s2_band02': self.max_items,\n", + " 'reflectance_s2_band03': self.max_items,\n", + " 'reflectance_s2_band04': self.max_items,\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.8b\",\n", + " \"desc\": \"temporal filter via groupby inactive due to reduce\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # # note: the same tests as 8a/8b could be written for filter_time\n", + " # # however due to issue #55, the test will currently result in unexpected results\n", + " # # src:\n", + " # recipe[\"res1\"] = sq.reflectance(\"s2_band03\").\\\n", + " # delineate().\\\n", + " # filter_time(\"year\", \"less\", 2020)\n", + " # # reduce(\"mean\", \"time\")\n", + " # recipe[\"res2\"] = sq.reflectance(\"s2_band03\").\\\n", + " # filter(sq.result(\"res1\"))\n", + "\n", + " # define test no. 9a\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"res1\"] = sq.reflectance(\"s2_band03\").\\\n", + " filter_time(\"year\", \"less\", 2020).\\\n", + " reduce(\"mean\", \"time\")\n", + " recipe[\"res2\"] = sq.reflectance(\"s2_band03\").\\\n", + " filter_time(\"year\", \"greater_equal\", 2020)\n", + " recipe[\"result\"] = sq.collection(sq.result(\"res1\"), sq.result(\"res2\")).\\\n", + " concatenate(\"band\")\n", + " result = {\n", + " 'reflectance_s2_band03': self.max_items,\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.9a\",\n", + " \"desc\": \"temporally filtered results in concatenation\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 9b\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"res1\"] = sq.reflectance(\"s2_band03\").\\\n", + " filter_time(\"year\", \"less\", 2020).\\\n", + " reduce(\"mean\", \"time\")\n", + " recipe[\"res2\"] = sq.reflectance(\"s2_band04\").\\\n", + " filter_time(\"year\", \"greater_equal\", 2020).\\\n", + " filter_time(\"year\", \"less_equal\", 2020)\n", + " recipe[\"result\"] = sq.collection(sq.result(\"res1\"), sq.result(\"res2\")).\\\n", + " concatenate(\"band\")\n", + " result = {\n", + " 'reflectance_s2_band03': 525960,\n", + " 'reflectance_s2_band04': 8784\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.9b\",\n", + " \"desc\": \"temporally filtered results in concatenation\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + " \n", + " # define test no. 10a\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(red_band, green_band, blue_band).\\\n", + " concatenate(\"band\").\\\n", + " filter_time(\"year\", \"less\", 2020).\\\n", + " groupby_space(\"feature\").\\\n", + " reduce(\"count\", \"space\")\n", + " result = {\n", + " 'reflectance_s2_band02': 525960,\n", + " 'reflectance_s2_band03': 525960,\n", + " 'reflectance_s2_band04': 525960,\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.10a\",\n", + " \"desc\": \"temporally filtered results with spatial ops\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 10b\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(red_band, green_band, blue_band).\\\n", + " concatenate(\"band\").\\\n", + " filter_space(\"feature\", \"equal\", 0).\\\n", + " filter_time(\"year\", \"less\", 2020)\n", + " result = {\n", + " 'reflectance_s2_band02': 525960,\n", + " 'reflectance_s2_band03': 525960,\n", + " 'reflectance_s2_band04': 525960,\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.10b\",\n", + " \"desc\": \"temporally filtered results with spatial ops\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 11\n", + " def make_true(obj, track_types = True, **kwargs):\n", + " newobj = obj.copy(deep = True)\n", + " newobj.values = np.ones_like(newobj)\n", + " if track_types:\n", + " newobj.sq.value_type = \"binary\"\n", + " del obj.sq.value_labels\n", + " return newobj\n", + " def modulus(x, y, track_types = True, **kwargs):\n", + " if track_types:\n", + " manual = {\"continuous\": {\"continuous\": \"continuous\"}, \"__preserve_labels\": 0}\n", + " promoter = TypePromoter(x, y, manual = manual)\n", + " promoter.check()\n", + " f = lambda x, y: np.mod(x, y)\n", + " y = xr.DataArray(y).sq.align_with(x)\n", + " out = xr.apply_ufunc(f, x, y)\n", + " if track_types:\n", + " out = promoter.promote(out)\n", + " return out\n", + " def sum_of_squares(x, track_types = False, **kwargs):\n", + " if track_types:\n", + " promoter = TypePromoter(x, function = \"sum\")\n", + " promoter.check()\n", + " f = lambda x, axis = None: np.sum(np.square(x), axis)\n", + " out = x.reduce(f, **kwargs)\n", + " if track_types:\n", + " promoter.promote(out)\n", + " return out\n", + " new_context = copy.deepcopy(self.context)\n", + " new_context[\"custom_verbs\"] = {\"make_true\": make_true}\n", + " new_context[\"custom_operators\"] = {\"modulus\": modulus}\n", + " new_context[\"custom_reducers\"] = {\"sum_of_squares\": sum_of_squares}\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"vals\"] = sq.reflectance(\"s2_band04\").filter_time(\"year\", \"less\", 2020)\n", + " recipe[\"ones\"] = sq.result(\"vals\").apply_custom(\"make_true\")\n", + " recipe[\"mod\"] = sq.result(\"vals\").evaluate(\"modulus\", 2)\n", + " recipe[\"foo\"] = sq.result(\"vals\").reduce(\"sum_of_squares\", \"space\")\n", + " result = {\n", + " 'reflectance_s2_band04': 525960,\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.11\",\n", + " \"desc\": \"temporal filter with custom verbs/ops/reducers\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": new_context\n", + " })\n", + "\n", + " # define test no. 12a\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"ndvi_I\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " filter(sq.self().evaluate(\"is_missing\"))\n", + " result = {\n", + " 'reflectance_s2_band04': 0, \n", + " 'reflectance_s2_band08': 0\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.12a\",\n", + " \"desc\": \"filter via inverse\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + " \n", + " # define test no. 12b\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = sq.collection(green_band).\\\n", + " concatenate(\"band\").\\\n", + " filter(sq.entity(\"cloud\").filter_time(\"after\", sq.time_instant(\"2019-12-31\")).evaluate(\"is_missing\"))\n", + " result = {\n", + " 'atmosphere_colortype': 525937,\n", + " 'reflectance_s2_band03': 525937\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.12b\",\n", + " \"desc\": \"filter via inverse\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 12c\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"res1\"] = sq.reflectance(\"s2_band03\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " filter(sq.reflectance(\"s2_band03\").evaluate(\"not_equal\", sq.self()))\n", + " result = {\n", + " 'reflectance_s2_band03': 8784\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.12c\",\n", + " \"desc\": \"filter via self in general - isn't considered since it's a content-based operation. Specific case here where everything gets filtered but this should be considered a content-based operation\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 13a\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = blue_band.filter(\n", + " sq.collection(\n", + " sq.self().extract(\"time\").evaluate(\"before\", sq.time_instant(\"2020-01-01\")),\n", + " sq.self().extract(\"time\").evaluate(\"during\", sq.time_interval(\"2020-01-01\", \"2020-10-31\")),\n", + " ).merge(\"any\")\n", + " )\n", + " result = {\n", + " 'reflectance_s2_band02': 533257\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.13a\",\n", + " \"desc\": \"collection of temporal filters\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 13b\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"composite\"] = blue_band.filter(\n", + " sq.collection(\n", + " sq.self().extract(\"time\").evaluate(\"during\", sq.time_interval(\"2020-10-31\", \"2021-01-01\")),\n", + " sq.self().extract(\"time\").evaluate(\"after\", sq.time_instant(\"2021-01-01\")),\n", + " ).merge(\"any\").evaluate(\"not\")\n", + " )\n", + " result = {\n", + " 'reflectance_s2_band02': 533256\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.13b\",\n", + " \"desc\": \"collection of temporal filters\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 13c\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"vegetation_count_per_season\"] = sq.reflectance(\"s2_band02\").\\\n", + " filter_time(\"year\", \"less\", 2024).\\\n", + " filter(sq.collection(\n", + " sq.self().extract(\"time\", \"month\").evaluate(\"in\", sq.interval(5, 6)),\n", + " sq.self().extract(\"time\", \"month\").evaluate(\"in\", sq.interval(6, 11))\n", + " ).compose())\n", + " result = {\n", + " 'reflectance_s2_band02': 328704\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.13c\",\n", + " \"desc\": \"collection of temporal filters\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 13d\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"vegetation_count_per_season\"] = sq.reflectance(\"s2_band02\").\\\n", + " filter_time(\"year\", \"less\", 2024).\\\n", + " filter(sq.collection(\n", + " sq.self().extract(\"time\", \"month\").evaluate(\"in\", sq.interval(5, 6)),\n", + " sq.self().extract(\"time\", \"month\").evaluate(\"in\", sq.interval(6, 11))\n", + " ).compose().evaluate(\"is_missing\"))\n", + " result = {\n", + " 'reflectance_s2_band02': 232320\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.13d\",\n", + " \"desc\": \"collection of temporal filters\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 13e\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"vegetation_count_per_season\"] = sq.reflectance(\"s2_band02\").\\\n", + " filter(sq.collection(\n", + " sq.self().extract(\"time\", \"month\").evaluate(\"in\", sq.interval(5, 6)),\n", + " sq.self().extract(\"time\", \"month\").evaluate(\"in\", sq.interval(6, 11))\n", + " ).compose().evaluate(\"not\"))\n", + " result = {\n", + " 'reflectance_s2_band02': 0\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.13e\",\n", + " \"desc\": \"collection of temporal filters - fake due to not\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 13f\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"ndvi_I\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " filter_time(\"season\", \"equal\", 3)\n", + " recipe[\"ndvi_II\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " filter_time(\"season\", \"equal\", 4)\n", + " recipe[\"bothndvi\"] = sq.collection(sq.result(\"ndvi_I\"), sq.result(\"ndvi_II\")).\\\n", + " merge(\"all\").\\\n", + " groupby_space(\"feature\").\\\n", + " reduce(\"percentage\", \"space\").\\\n", + " concatenate(\"season\").\\\n", + " filter(sq.self().extract(\"time\").evaluate(\"during\", sq.time_interval(\"2020-01-01\", \"2020-02-01\")))\n", + " result = {\n", + " 'reflectance_s2_band04': 4368, \n", + " 'reflectance_s2_band08': 4368\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.13f\",\n", + " \"desc\": \"base recipe for next steps\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 14a\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"ndvi_I\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " filter_time(\"season\", \"equal\", 3)\n", + " recipe[\"ndvi_II\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " filter_time(\"season\", \"equal\", 4)\n", + " recipe[\"bothndvi\"] = sq.collection(sq.result(\"ndvi_I\"), sq.result(\"ndvi_II\")).\\\n", + " merge(\"all\").\\\n", + " groupby_space(\"feature\").\\\n", + " extract(\"time\")\n", + " result = {\n", + " 'reflectance_s2_band04': self.max_items,\n", + " 'reflectance_s2_band08': self.max_items\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.14a\",\n", + " \"desc\": \"collection output with extracted time\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 14b\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"ndvi_I\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " filter_time(\"season\", \"equal\", 3) \n", + " recipe[\"ndvi_II\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " filter_time(\"season\", \"equal\", 4)\n", + " recipe[\"bothndvi\"] = sq.collection(sq.result(\"ndvi_I\"), sq.result(\"ndvi_II\")).\\\n", + " merge(\"all\").\\\n", + " groupby_space(\"feature\").\\\n", + " shift(sq.dimensions.TIME, 1).\\\n", + " reduce(\"percentage\", \"space\").\\\n", + " extract(\"time\").\\\n", + " evaluate(\"during\", sq.time_interval(\"2020-01-01\", \"2020-01-07\")) \n", + " result = {\n", + " 'reflectance_s2_band04': self.max_items,\n", + " 'reflectance_s2_band08': self.max_items\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.14b\",\n", + " \"desc\": \"collection output with extracted & evaluated time\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + "\n", + " # define test no. 14c\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"ndvi_I\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " filter_time(\"season\", \"equal\", 3)\n", + " recipe[\"ndvi_II\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " filter_time(\"season\", \"equal\", 4)\n", + " recipe[\"bothndvi\"] = sq.collection(sq.result(\"ndvi_I\"), sq.result(\"ndvi_II\")).\\\n", + " merge(\"all\").\\\n", + " groupby_space(\"feature\").\\\n", + " shift(sq.dimensions.TIME, 1).\\\n", + " reduce(\"percentage\", \"space\").\\\n", + " concatenate(\"season\").\\\n", + " extract(\"time\") \n", + " result = {\n", + " 'reflectance_s2_band04': self.max_items,\n", + " 'reflectance_s2_band08': self.max_items\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.14c\",\n", + " \"desc\": \"array output with extracted time\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 14d\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"ndvi_I\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " filter_time(\"season\", \"equal\", 3).\\\n", + " trim(\"time\") \n", + " recipe[\"ndvi_II\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " filter_time(\"season\", \"equal\", 4).\\\n", + " trim(\"time\")\n", + " recipe[\"bothndvi\"] = sq.collection(sq.result(\"ndvi_I\"), sq.result(\"ndvi_II\")).\\\n", + " merge(\"all\").\\\n", + " groupby_space(\"feature\").\\\n", + " shift(sq.dimensions.TIME, 1).\\\n", + " reduce(\"percentage\", \"space\").\\\n", + " extract(\"time\").\\\n", + " evaluate(\"during\", sq.time_interval(\"2020-01-01\", \"2020-01-07\")) \n", + " result = {\n", + " 'reflectance_s2_band04': 4368,\n", + " 'reflectance_s2_band08': 4368\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.14d\",\n", + " \"desc\": \"array output with extracted time - same as 100d1 due to trim\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + "\n", + " # define test no. 14e\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"ndvi_I\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " trim().\\\n", + " filter_time(\"season\", \"equal\", 3)\n", + " recipe[\"ndvi_II\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " trim().\\\n", + " filter_time(\"season\", \"equal\", 4)\n", + " recipe[\"bothndvi\"] = sq.collection(sq.result(\"ndvi_I\"), sq.result(\"ndvi_II\")).\\\n", + " merge(\"all\").\\\n", + " groupby_space(\"feature\").\\\n", + " shift(sq.dimensions.TIME, 1).\\\n", + " reduce(\"percentage\", \"space\").\\\n", + " concatenate(\"season\").\\\n", + " extract(\"time\") \n", + " result = {\n", + " 'reflectance_s2_band04': 8784,\n", + " 'reflectance_s2_band08': 8784\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.14e\",\n", + " \"desc\": \"array output with extracted time - interim trim\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 14f\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"ndvi_I\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " trim().\\\n", + " filter_time(\"season\", \"equal\", 3)\n", + " recipe[\"ndvi_II\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " trim().\\\n", + " filter_time(\"season\", \"equal\", 4)\n", + " recipe[\"bothndvi\"] = sq.collection(sq.result(\"ndvi_I\"), sq.result(\"ndvi_II\")).\\\n", + " merge(\"all\").\\\n", + " extract(\"space\")\n", + " result = {\n", + " 'reflectance_s2_band04': 4368,\n", + " 'reflectance_s2_band08': 4368\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.14f\",\n", + " \"desc\": \"array output with extracted space (-> no temporal output)\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 14g\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"ndvi_I\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " trim().\\\n", + " filter_time(\"season\", \"equal\", 3)\n", + " recipe[\"ndvi_II\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " trim().\\\n", + " filter_time(\"season\", \"equal\", 4)\n", + " recipe[\"bothndvi\"] = sq.collection(sq.result(\"ndvi_I\"), sq.result(\"ndvi_II\")).\\\n", + " merge(\"all\").\\\n", + " groupby_space(\"feature\").\\\n", + " extract(\"space\")\n", + " result = {\n", + " 'reflectance_s2_band04': 4368,\n", + " 'reflectance_s2_band08': 4368\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.14g\",\n", + " \"desc\": \"Collection output with extracted space (-> no temporal output)\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 14h\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"res\"] = sq.reflectance(\"s2_band04\").\\\n", + " filter_time(\"year\", \"less\", 2020).\\\n", + " extract(\"space\")\n", + " result = {\n", + " 'reflectance_s2_band04': self.max_items\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.14h\",\n", + " \"desc\": \"Single output with extracted space, is ignored\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + "\n", + " # define test no. 19a\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"res1\"] = sq.reflectance(\"s2_band04\").\\\n", + " filter_time(\"year\", \"less\", 2020).\\\n", + " evaluate(\"subtract\", sq.reflectance(\"s2_band03\").reduce(\"mean\", \"time\"))\n", + " recipe[\"res2\"] = sq.reflectance(\"s2_band08\").\\\n", + " filter_time(\"year\", \"less\", 2020).\\\n", + " shift(\"time\", 1)\n", + " recipe[\"res3\"] = sq.reflectance(\"s2_band02\").\\\n", + " evaluate(\"less\", sq.result(\"res1\"))\n", + " recipe[\"result\"] = sq.collection(sq.result(\"res1\"), sq.result(\"res2\"), sq.result(\"res3\")).\\\n", + " concatenate(\"band\").\\\n", + " filter_time(\"year\", \"less\", 2019).\\\n", + " reduce(\"mean\", \"band\")\n", + " result = {\n", + " 'reflectance_s2_band02': self.max_items,\n", + " 'reflectance_s2_band03': self.max_items,\n", + " 'reflectance_s2_band04': 525960,\n", + " 'reflectance_s2_band08': 525960\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.19a\",\n", + " \"desc\": \"complex series of filter & concatenate operations\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " \n", + " # define test no. 19b\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"result\"] = sq.collection(\n", + " (sq.reflectance(\"s2_band04\").\\\n", + " filter_time(\"year\", \"less\", 2020).\\\n", + " evaluate(\"subtract\", sq.reflectance(\"s2_band03\").reduce(\"mean\", \"time\"))),\n", + " (sq.reflectance(\"s2_band08\").\\\n", + " filter_time(\"year\", \"greater\", 2020)).\\\n", + " filter_time(\"year\", \"less\", 2023).\\\n", + " shift(\"time\", 1)).\\\n", + " concatenate(\"band\").\\\n", + " filter_time(\"year\", \"less\", 2020).\\\n", + " reduce(\"mean\", \"band\")\n", + " result = {\n", + " 'reflectance_s2_band03': self.max_items,\n", + " 'reflectance_s2_band04': 525960,\n", + " 'reflectance_s2_band08': 17520\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.19b\",\n", + " \"desc\": \"complex series of filter & concatenate operations\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 19c\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"result\"] = sq.collection(\n", + " (sq.reflectance(\"s2_band04\").\\\n", + " filter_time(\"year\", \"less\", 2020).\\\n", + " evaluate(\"subtract\", sq.reflectance(\"s2_band03\").reduce(\"mean\", \"time\"))),\n", + " (sq.reflectance(\"s2_band08\").\\\n", + " filter_time(\"year\", \"greater\", 2020))).\\\n", + " concatenate(\"band\").\\\n", + " filter_time(\"year\", \"less\", 2019).\\\n", + " reduce(\"mean\", \"band\")\n", + " result = {\n", + " 'reflectance_s2_band03': self.max_items,\n", + " 'reflectance_s2_band04': 517200,\n", + " 'reflectance_s2_band08': 0\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.19c\",\n", + " \"desc\": \"complex series of filter & concatenate operations\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 19d\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"res1\"] = sq.reflectance(\"s2_band04\").\\\n", + " evaluate(\"subtract\", \n", + " sq.reflectance(\"s2_band03\").\\\n", + " filter_time(\"year\", \"less\", 2020).\\\n", + " reduce(\"mean\", \"time\")\n", + " )\n", + " recipe[\"res2\"] = sq.reflectance(\"s2_band03\").\\\n", + " filter_time(\"year\", \"less\", 2020).\\\n", + " reduce(\"mean\", \"time\").\\\n", + " evaluate(\"subtract\", sq.reflectance(\"s2_band04\").reduce(\"mean\", \"time\"))\n", + " result = {\n", + " 'reflectance_s2_band03': 525960, \n", + " 'reflectance_s2_band04': self.max_items,\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.19d\",\n", + " \"desc\": \"complex series of filter & evaluate operations\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 19e\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"res\"] = sq.collection(\n", + " sq.collection(\n", + " blue_band.filter(sq.self().extract(\"time\").evaluate(\"during\", sq.time_interval(\"2021-01-01\", \"2021-12-31\"))),\n", + " green_band \n", + " ).\\\n", + " compose().\\\n", + " filter_time(\"year\", \"greater\", 2010),\n", + " red_band \n", + " ).\\\n", + " merge(\"any\").\\\n", + " filter(sq.self().extract(\"time\", \"year\").evaluate(\"less\", 2015))\n", + " result = {\n", + " 'reflectance_s2_band02': 0,\n", + " 'reflectance_s2_band03': 35064,\n", + " 'reflectance_s2_band04': 482136\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.19e\",\n", + " \"desc\": \"complex series of filter & compose/merge operations\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 19f\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"vegetation_I\"] = sq.entity(\"vegetation\").\\\n", + " filter_time(\"year\", \"equal\", 2019).\\\n", + " groupby_time(\"month\").\\\n", + " reduce(\"first\", \"time\").\\\n", + " concatenate(\"month\") \n", + " recipe[\"vegetation_II\"] = sq.entity(\"vegetation\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " groupby_time(\"month\").\\\n", + " reduce(\"last\", \"time\").\\\n", + " concatenate(\"month\") \n", + " recipe[\"result\"] = sq.collection(\n", + " sq.result(\"vegetation_I\"),\n", + " sq.result(\"vegetation_II\")\n", + " ).\\\n", + " merge(\"all\")\n", + " result = {\n", + " 'appearance_colortype': 17544\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.19f\",\n", + " \"desc\": \"real-world recipes\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 19g\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"ndvi_I\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " filter_time(\"season\", \"equal\", 3).\\\n", + " trim()\n", + " recipe[\"ndvi_II\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " filter_time(\"season\", \"equal\", 4).\\\n", + " trim()\n", + " recipe[\"bothndvi\"] = sq.collection(sq.result(\"ndvi_I\"), sq.result(\"ndvi_II\")).\\\n", + " merge(\"all\").\\\n", + " groupby_space(\"feature\").\\\n", + " shift(sq.dimensions.TIME, 1).\\\n", + " reduce(\"percentage\", \"space\").\\\n", + " concatenate(\"feature\").\\\n", + " extract(\"time\")\n", + " result = {\n", + " 'reflectance_s2_band04': 4368,\n", + " 'reflectance_s2_band08': 4368\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.19g\",\n", + " \"desc\": \"real-world recipes\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 19h\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"red\"] = sq.entity(\"red_band\").\\\n", + " extract(\"time\").\\\n", + " evaluate(\"during\",sq.time_interval(\"2019-01-02\",\"2020-01-05\")).\\\n", + " reduce(\"first\", \"time\").\\\n", + " filter(sq.self())\n", + " recipe[\"bluegreen\"] = sq.collection(\n", + " sq.entity(\"blue_band\").\\\n", + " filter_time(\"month\", \"greater\", 11),\n", + " sq.entity(\"green_band\").\\\n", + " extract(\"time\", \"year\").\\\n", + " evaluate(\"equal\", 2020).\\\n", + " reduce(\"first\", \"time\")\n", + " ).concatenate(\"band\")\n", + " recipe[\"result\"] = sq.collection(sq.result(\"bluegreen\"), sq.result(\"red\")).\\\n", + " merge(\"any\")\n", + " result = {\n", + " 'reflectance_s2_band02': 47616,\n", + " 'reflectance_s2_band03': self.max_items,\n", + " 'reflectance_s2_band04': self.max_items\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.19h\",\n", + " \"desc\": \"real-world recipes\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 19j\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"RGB\"] = sq.collection(\n", + " sq.entity(\"blue_band\"),\n", + " sq.entity(\"green_band\"),\n", + " sq.entity(\"red_band\")\n", + " ).concatenate(\"band\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " filter_time(\"season\", \"equal\", 3).\\\n", + " reduce(\"first\", \"time\")\n", + " recipe[\"red_layer\"] = sq.entity(\"red_band\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " filter_time(\"month\", \"greater\", 4).\\\n", + " groupby_space(\"feature\").\\\n", + " reduce(\"percentage\", \"space\").\\\n", + " concatenate(\"month\") \n", + " recipe[\"green_layer\"] = sq.entity(\"green_band\").\\\n", + " filter_space(\"feature\", \"less\", 3223).\\\n", + " filter_time(\"year\", \"less\", 2019).\\\n", + " trim().\\\n", + " extract(\"time\", \"season\").\\\n", + " reduce(\"first\", \"time\")\n", + " recipe[\"RG\"] = sq.result(\"red_layer\").\\\n", + " evaluate(\"add\", sq.result(\"green_layer\")).\\\n", + " groupby_time(\"season\").\\\n", + " concatenate(\"feature\")\n", + " recipe[\"addall\"] = sq.collection(sq.result(\"RGB\").filter(sq.result(\"green_layer\")).\\\n", + " reduce(\"percentage\", \"space\"),\\\n", + " sq.result(\"red_layer\")).\\\n", + " merge(\"any\").\\\n", + " filter_time(\"year\", \"less\", 2020)\n", + " result = {\n", + " 'reflectance_s2_band02': 2184,\n", + " 'reflectance_s2_band03': 519384,\n", + " 'reflectance_s2_band04': 5880\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.19j\",\n", + " \"desc\": \"real-world recipes\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 19k\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"red\"] = sq.entity(\"red_band\").\\\n", + " filter_time(\"year\", \"equal\", 2019).\\\n", + " filter_time(\"month\", \"greater\", 6).\\\n", + " reduce(\"first\", \"time\")\n", + " recipe[\"green\"] = sq.entity(\"green_band\").\\\n", + " filter_time(\"year\", \"equal\", 2019).\\\n", + " reduce(\"first\", \"time\")\n", + " recipe[\"RG\"] = sq.collection(sq.result(\"red\"), sq.result(\"green\")).\\\n", + " merge(\"all\").\\\n", + " filter_space(\"feature\",\"less\", 200).\\\n", + " extract(\"space\").\\\n", + " reduce(\"percentage\", \"space\").\\\n", + " evaluate(\"greater\", 20)\n", + " recipe[\"binarymask\"] = sq.collection(sq.result(\"red\"), sq.result(\"green\")).\\\n", + " merge(\"any\")\n", + " recipe[\"filter\"] = sq.result(\"red\").\\\n", + " filter(sq.result(\"binarymask\")).\\\n", + " reduce(\"percentage\", \"space\").\\\n", + " evaluate(\"less\" ,20)\n", + " recipe[\"ndvi\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " filter_time(\"season\", \"less_equal\", 3)\n", + " recipe[\"ndvi_trim\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " filter_time(\"season\", \"less_equal\", 3).\\\n", + " trim()\n", + " recipe[\"ndvi_sub\"] = sq.result(\"ndvi\").evaluate(\"subtract\",(sq.result(\"ndvi_trim\"))).\\\n", + " reduce(\"percentage\", \"space\")\n", + " recipe[\"RG_trim_add\"] = sq.result(\"ndvi\").evaluate(\"add\", sq.result(\"ndvi_trim\")).\\\n", + " reduce(\"first\", \"time\")\n", + " recipe[\"RG_trim_before\"] = sq.collection(sq.result(\"RG\"), sq.result(\"ndvi_trim\")).\\\n", + " merge(\"all\")\n", + " recipe[\"RG_trim_after\"] = sq.collection(sq.result(\"RG\"), sq.result(\"ndvi\")).\\\n", + " merge(\"all\").\\\n", + " trim()\n", + " result = {\n", + " 'reflectance_s2_band03': 8760,\n", + " 'reflectance_s2_band04': 11016,\n", + " 'reflectance_s2_band08': 6600\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.19k\",\n", + " \"desc\": \"real-world recipes\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 19l\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"red\"] = sq.entity(\"red_band\").\\\n", + " filter_time(\"year\", \"equal\", 2019).\\\n", + " filter_time(\"month\", \"greater\", 6).\\\n", + " filter_space(\"feature\", \"less\", 3).\\\n", + " groupby_space(\"feature\").\\\n", + " reduce(\"percentage\", \"space\").\\\n", + " evaluate(\"greater\", 50).\\\n", + " concatenate(\"feature\")\n", + " recipe[\"green\"] = sq.entity(\"green_band\").\\\n", + " filter_time(\"year\", \"equal\", 2020).\\\n", + " filter_time(\"season\", \"equal\", 3).\\\n", + " groupby_space(\"feature\").\\\n", + " concatenate(\"feature\").\\\n", + " reduce(\"first\", \"time\")\n", + " recipe[\"greenred\"] = sq.collection(sq.result(\"red\"), sq.result(\"green\")).\\\n", + " trim().\\\n", + " merge(\"all\").\\\n", + " groupby_space(\"feature\").\\\n", + " shift(sq.dimensions.TIME, 1).\\\n", + " reduce(\"percentage\", \"space\").\\\n", + " extract(\"time\").\\\n", + " evaluate(\"during\", sq.time_instant(\"2019\"))\n", + " recipe[\"ndvi_space\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"equal\", 2019).\\\n", + " filter_space(\"feature\",\"less\", 3333).\\\n", + " groupby_space(\"feature\").\\\n", + " extract(\"space\").\\\n", + " reduce(\"percentage\", \"space\").\\\n", + " evaluate(\"less\" ,50).\\\n", + " concatenate(\"feature\")\n", + " recipe[\"ndvi_sub\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"less\", 2020).\\\n", + " reduce(\"first\", \"time\").\\\n", + " filter_space(\"feature\",\"less\", 200).\\\n", + " groupby_space(\"feature\").\\\n", + " extract(\"space\").\\\n", + " reduce(\"percentage\", \"space\").\\\n", + " evaluate(\"greater\" ,20).\\\n", + " concatenate(\"feature\")\n", + " recipe[\"ndvi_month\"] = sq.entity(\"NDVI\").\\\n", + " filter_time(\"year\", \"less\", 2020).\\\n", + " filter_time(\"month\", \"greater\", 5).\\\n", + " groupby_space(\"feature\").\\\n", + " extract(\"space\").\\\n", + " reduce(\"percentage\", \"space\").\\\n", + " evaluate(\"greater\" ,20).\\\n", + " concatenate(\"feature\").\\\n", + " evaluate(\"subtract\", sq.result(\"ndvi_space\")) \n", + " result = {\n", + " 'reflectance_s2_band03': 2184,\n", + " 'reflectance_s2_band04': 525960,\n", + " 'reflectance_s2_band08': 525960 \n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.19l\",\n", + " \"desc\": \"real-world recipes\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " # define test no. 19m\n", + " recipe = sq.QueryRecipe()\n", + " recipe[\"forest\"] = sq.entity(\"forest\")\n", + " recipe[\"stats\"] = (sq.entity(\"all\")\n", + " .filter(sq.entity(\"valid_obs\"))\n", + " .groupby_time(\"year\")\n", + " .reduce(\"count\", \"time\")\n", + " .concatenate(\"year\")\n", + " )\n", + " recipe[\"mask\"] = (sq.result(\"stats\")\n", + " .filter(sq.result(\"forest\"))\n", + " .reduce(\"min\", \"year\")\n", + " .evaluate(\"greater\", 5)\n", + " )\n", + " recipe[\"status_orginal\"] = (sq.entity(\"vegetation\")\n", + " .filter(sq.result(\"mask\"))\n", + " .filter(sq.entity(\"valid_obs\"))\n", + " .filter_time(\"year\", \"equal\", 2020) \n", + " .reduce(\"percentage\", \"time\")\n", + " )\n", + " recipe[\"status_post\"] = (sq.entity(\"vegetation\")\n", + " .filter(sq.result(\"mask\"))\n", + " .filter(sq.entity(\"valid_obs\"))\n", + " .groupby_time(\"year\")\n", + " .reduce(\"percentage\", \"time\")\n", + " .evaluate(\"subtract\", sq.result(\"status_orginal\"))\n", + " .concatenate(\"year\")\n", + " .filter(sq.self().extract(\"year\").evaluate(\"equal\", 2020).evaluate(\"not\"))\n", + " .trim()\n", + " )\n", + " result = {\n", + " 'appearance_colortype': self.max_items,\n", + " 'reflectance_s2_band04': 8784,\n", + " 'reflectance_s2_band08': 8784\n", + " }\n", + " self.tests.append({\n", + " \"name\": \"no.19m\",\n", + " \"desc\": \"real-world recipes\",\n", + " \"recipe\": recipe,\n", + " \"result\": result,\n", + " \"context\": self.context\n", + " })\n", + "\n", + " def execute(self):\n", + " for test in self.tests:\n", + " print(f\"Running test : {test['name']}\")\n", + " # run recipe with QueryProcessor\n", + " try:\n", + " qp_context = copy.deepcopy(test[\"context\"])\n", + " del qp_context[\"meta_timestamps\"]\n", + " with warnings.catch_warnings():\n", + " warnings.simplefilter(\"ignore\")\n", + " fp = QueryProcessor.parse(test['recipe'], **qp_context)\n", + " except Exception as e:\n", + " print(\"Recipe not executable with usual QueryProcessor\")\n", + " print(\"Error message: \", e)\n", + " # run recipe with FilterProcessor\n", + " with warnings.catch_warnings():\n", + " warnings.simplefilter(\"ignore\")\n", + " fp = FilterProcessor.parse(test['recipe'], **test['context'])\n", + " self.max_items = len(fp.meta_timestamps)\n", + " response = fp.optimize().execute()\n", + " # check result\n", + " try:\n", + " res_shape = {k:len(v) for k,v in response.items()}\n", + " for key, value in test[\"result\"].items():\n", + " if value is None:\n", + " test[\"result\"][key] = self.max_items\n", + " if self.ttype == \"real\":\n", + " print(f\"Resulting shape: {res_shape}\")\n", + " else:\n", + " assert res_shape == test[\"result\"]\n", + " except Exception as e:\n", + " print(\"Test failed\")\n", + " print(f\"Expected shape: {test['result']}\")\n", + " print(f\"Resulting shape: {res_shape}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running test : no.1\n", + "Running test : no.2a\n", + "Running test : no.2b\n", + "Running test : no.3a\n", + "Running test : no.3b\n", + "Running test : no.3c\n", + "Running test : no.3d\n", + "Running test : no.3e\n", + "Running test : no.3f\n", + "Running test : no.4a\n", + "Running test : no.4b\n", + "Running test : no.4c\n", + "Running test : no.5a\n", + "Running test : no.5b\n", + "Running test : no.5c\n", + "Running test : no.5d\n", + "Running test : no.6a\n", + "Running test : no.6b\n", + "Running test : no.6c\n", + "Running test : no.7a\n", + "Running test : no.7b\n", + "Running test : no.7c\n", + "Running test : no.7d\n", + "Running test : no.8a\n", + "Running test : no.8b\n", + "Running test : no.9a\n", + "Running test : no.9b\n", + "Running test : no.10a\n", + "Running test : no.10b\n", + "Running test : no.11\n", + "Running test : no.12a\n", + "Running test : no.12b\n", + "Running test : no.12c\n", + "Running test : no.13a\n", + "Running test : no.13b\n", + "Running test : no.13c\n", + "Running test : no.13d\n", + "Running test : no.13e\n", + "Running test : no.13f\n", + "Running test : no.14a\n", + "Running test : no.14b\n", + "Running test : no.14c\n", + "Running test : no.14d\n", + "Running test : no.14e\n", + "Running test : no.14f\n", + "Running test : no.14g\n", + "Running test : no.14h\n", + "Running test : no.19a\n", + "Running test : no.19b\n", + "Running test : no.19c\n", + "Running test : no.19d\n", + "Running test : no.19e\n", + "Running test : no.19f\n", + "Running test : no.19g\n", + "Running test : no.19h\n", + "Running test : no.19j\n", + "Running test : no.19k\n", + "Running test : no.19l\n", + "Running test : no.19m\n" + ] + } + ], + "source": [ + "# run all tests\n", + "tests = TestSuite(ttype=\"generated\")\n", + "tests.populate()\n", + "tests.execute()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "semantique", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.1" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/demo/figures/semantic_temp_filter_I.png b/demo/figures/semantic_temp_filter_I.png new file mode 100644 index 00000000..2a849d54 Binary files /dev/null and b/demo/figures/semantic_temp_filter_I.png differ diff --git a/demo/figures/semantic_temp_filter_II.png b/demo/figures/semantic_temp_filter_II.png new file mode 100644 index 00000000..fa34a82a Binary files /dev/null and b/demo/figures/semantic_temp_filter_II.png differ diff --git a/demo/processor.ipynb b/demo/processor.ipynb index 2e3112ce..37afdaff 100644 --- a/demo/processor.ipynb +++ b/demo/processor.ipynb @@ -3131,6 +3131,31 @@ "It should be noted that in our demos only data loaded from locally stored GeoTIFF files are analysed. This is sort of the worst case for demonstrating the benefits of caching since the data is stored locally and is therefore quickly accessible. Keep in mind, however, that caching is designed for and particularly beneficial in case of STACCubes when loading data over the internet." ] }, + { + "cell_type": "markdown", + "id": "6b429e49", + "metadata": {}, + "source": [ + "## Filtering data layers temporally\n", + "\n", + "The execution of the QueryProcessor via `recipe.execute()` has an optional preceding FilterProcessor which is switched on by default (`filter_check=True`). The FilterProcessor evaluates the recipe for possible temporal filter operations in order to analyse which data actually needs to be loaded for the result calculation. Without the FilterProcessor, the temporal extent of the loaded data is determined exclusively via the context parameter `time` that needs to be passed to the QueryProcessor. If the data is further subset temporally within a recipe, all data would first be loaded in order to subsequently filter it temporally. The FilterProcessor uses a semantic evaluation of the recipe to filter the metadata in advance to the extent necessary in terms of time. The FilterProcessor evaluates the entire recipe for temporal filter operations and keeps track of the data layers that are affected by these filter operations. If a recipe contains several results, the union of the temporal extents required for all results is finally determined in order to obtain the minimum required extent for the entire recipe evaluation. Some of the specifics of how the FilterProcessor works are illustrated in the following figures.\n", + "\n", + "* The order of filter operations within a recipe is generally irrelevant when concatenating with other verbs. This means that in both cases 1.a) and 1.b), the temporal filter is recognised and taken into account when the data is later loaded via QueryProcessor. \n", + "* An exception to the previously mentioned irrelevance of the order of verbs with regard to filter evaluation is `smooth`, `shift` and `fill`. These verbs can directly change the temporal extent of the data to be loaded by including temporally neighbouring values. However, the question of which values are temporally neighbouring cannot be answered without an actual content-based evaluation of the recipe. For example, depending on a previous semantic filter operation (e.g. according to the entity clouds), the next non-null value may be at different distances in time depending. As a universal evaluation of the temporal effect of these verbs is therefore not possible, the FilterProcessor can't analyse them or any subsequent verbs with regard to their filter effect. As shown in 2.a) & 2.b), this means for the creation of recipes that temporal filter operations must be placed before these verbs so that they are taken into account by the FilterProcessor.\n", + "\n", + "![FilterProcessor - temporal filtering, part A](figures/semantic_temp_filter_I.png)\n", + "\n", + "* According to the general functionality of some verbs, when processing several data layers (e.g. by merging different entities), a data layer can also be indirectly filtered by a temporal filter in the other part of the recipe. This is possible with the verbs `filter`, `groupby`, `evaluate`, `concatenate` and `merge` as shown in 3.a). There, the temporality of the data in the part of the recipe that has not yet been filtered is also filtered automatically (partly via implicit align). This only happens if the data used for filtering, grouping, evaluating, concatenating or merging still have timestamps themselves. If these no longer exist, e.g. via reduce as shown in 3.b), the implicit filter effect does not exist either. In this case, the temporal filter operation must be explicitly called in both parts of the recipe so that the entirety of all loaded data has a reduced temporal extent, which is recognised accordingly by the FilterProcessor (3.c).\n", + "\n", + "![FilterProcessor - temporal filtering, part B](figures/semantic_temp_filter_II.png)\n", + "\n", + "Some other things to be considered:\n", + "* The FilterProcessor evaluates all forms of temporal filter operations (regardless of whether the operation is called via shortcut `filter_time()` or `filter(sq.self().extract().evaluate()`).\n", + "* Per definition `filter_time` is not removing the filtered coordinates but setting them to NaN. To really remove them, a `trim` operation needs to follow `filter_time`. The FilterProcessor looks at the recipes results and treats them as if a final `trim` operation is called. This means that coordinates with NaNs caused by temporal filters are filtered out even though no explicit `trim` has been included in the recipe. If the user want's to filter the data temporally but keep the filtered data as NaNs, he/she would need to disable the FilterProcessor evaluation (`filter_check=False`). Evaluating a hypothetical `trim` at the very end of a recipe's result is a design choice to match users general behaviour not to care about NaNs. The fact that the `trim` is evaluated as a final operation (and not as a direct sucessor of filter_time calls) ensures that other operations which actually rely on the existance of the NaNs are not affected. For example: If the user calls an `extract_time` after the temporal filter to get the timestamps of all data points (incl. those where the data is filtered out but not trimmed), the FilterEvaluator won't modify the result of the extract_time since the NaNs aren't filtered out until the very end. The final trim ensures that NaNs are considered to be superfluous only if they are part of the recipes result not if they occur somewhere along the way. This also implies that the user could benefit from making a manual `trim` call after a temporal filter, if he/she wants be be certain that the intermediate processing result is as trimmed right away. \n", + "* For custom verbs it is generally assumed that they do not change the temporality of the data to be loaded. Meaning the FilterProcessor doesn't consider custom verbs in the category of verbs to be looking out for as terminating verbs (such as `smooth`, `shift` and `fill`). Custom verbs are not evaluated in terms of their potential effect on temporal filtering. In case a custom verb does modify the temporality of the data, it is up to the user to switch off the FilterProcessor accordingly so that the data is not pre-filtered incorrectly (`filter_check=False`).\n", + "* Recipe results representing spatial dimensions (something produced by calling `extract_space`, which is not processed further e.g. by evaluating it in a filter operation) are ignored in the FilterProcessor evaluation. Temporal filters are not considered here as those results are completely time-independent." + ] + }, { "cell_type": "markdown", "id": "bc13ea19", @@ -3177,9 +3202,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python [conda env:semantique]", + "display_name": "semantique", "language": "python", - "name": "conda-env-semantique-py" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -3191,7 +3216,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.10.1" } }, "nbformat": 4, diff --git a/semantique/datacube.py b/semantique/datacube.py index 3ffbb16e..a16dbf31 100644 --- a/semantique/datacube.py +++ b/semantique/datacube.py @@ -1,4 +1,5 @@ import numpy as np +import pandas as pd import xarray as xr import copy @@ -14,17 +15,20 @@ import rioxarray import stackstac import warnings +import zipfile from abc import abstractmethod from datacube.utils import masking from pystac_client.stac_api_io import StacApiIO from rasterio.errors import RasterioIOError +from rasterio.io import MemoryFile from shapely.geometry import box, shape from shapely.ops import transform from urllib3 import Retry from semantique import exceptions from semantique.dimensions import TIME, SPACE, X, Y +from semantique.processor import utils class Datacube(): """Base class for EO data cube configurations. @@ -105,6 +109,10 @@ class Opendatacube(Datacube): EO data cube is constructed. connection : :obj:`datacube.Datacube` Opendatacube interface object allowing to read from the data cube. + data_dict : :obj:`dict` + Dictionary with layer references as keys and dataset UUIDs to be loaded + as values. Allows to restrict the data when performing a database lookup. + If :obj:`None`, all datasets within the OpenDataCube are accessible. tz Timezone of the temporal coordinates in the EO data cube. Can be given as :obj:`str` referring to the name of a timezone in the tz database, or @@ -158,9 +166,17 @@ class Opendatacube(Datacube): """ - def __init__(self, layout = None, connection = None, tz = "UTC", **config): + def __init__( + self, + layout = None, + connection = None, + data_dict = None, + tz = "UTC", + **config + ): super(Opendatacube, self).__init__(layout) self.connection = connection + self.data_dict = data_dict self.tz = tz # Update default configuration parameters with provided ones. params = self._default_config @@ -179,6 +195,17 @@ def connection(self, value): assert isinstance(value, datacube.Datacube) self._connection = value + @property + def data_dict(self): + """:obj:`dict`: Dict with product Ids by the Opendatacube as values.""" + return self._data_dict + + @data_dict.setter + def data_dict(self, value): + if value is not None: + assert isinstance(value, dict) + self._data_dict = value + @property def tz(self): """:obj:`datetime.tzinfo`: Timezone of the temporal coordinates in the @@ -311,6 +338,78 @@ def retrieve(self, *reference, extent): data = data.astype("float") return data + def retrieve_metadata(self, *reference, extent): + """Retrieve metadata for a data layer from the EO data cube. + Metadata contains the timestamp & spatial bounding box, both in the + extent objects spatio-temporal reference systems (tz and crs). + + Parameters + ---------- + *reference: + The index of the data layer in the layout of the EO data cube. + extent : :obj:`xarray.DataArray` + Spatio-temporal extent in which the data should be retrieved. Should be + given as an array with a temporal dimension and two spatial dimensions, + such as returned by + :func:`parse_extent `. + The retrieved subset of the EO data cube will have the same extent. + + Returns + ------- + :obj:`pd.DataFrame` + The metadata for the data layer. + + """ + # Solve the reference by obtaining the corresponding metadata object. + metadata = self.lookup(*reference) + # Check if extent is valid. + if TIME not in extent.dims: + raise exceptions.MissingDimensionError( + "Cannot retrieve data in an extent without a temporal dimension" + ) + if X not in extent.dims or Y not in extent.dims: + raise exceptions.MissingDimensionError( + "Cannot retrieve data in an extent without spatial dimensions" + ) + # Create a template for the metadata to be loaded. + names = {Y: "y", X: "x", TIME: "time"} + like = extent.sq.tz_convert(self.tz).sq.rename_dims(names).to_dataset() + # Compose metadata query. + query = datacube.api.query.Query( + product = metadata["product"], + like = like, + group_by = "solar_day" if self.config["group_by_solar_day"] else None + ) + # Execute metadata search. + ds = self.connection.find_datasets(**query.search_terms) + # Prepare lists to collect metadata items. + ids = [] + times = [] + bboxes = [] + # Create transformers for CRS. + dst_crs = pyproj.CRS(extent.rio.crs) + src_crs = [x.crs for x in ds] + src_crs_unique = [pyproj.CRS(x) for x in set(src_crs)] + src_crs_names = [str(x) for x in set(src_crs)] + transformers = [ + pyproj.Transformer.from_crs(src, dst_crs, always_xy=True) + for src in src_crs_unique + ] + transformers = {n:t for n,t in zip(src_crs_names, transformers)} + # Collect results in a loop. + for i, x in enumerate(ds): + ids.append(x.id) + # Get datetime in destination tz + dt = np.datetime64(x.center_time.replace(tzinfo=None)) + dt = utils.convert_datetime64(dt, self.tz, extent.sq.tz) + times.append(dt) + # Get bbox in destination crs + bbox = box(*transformers[x.crs].transform_bounds(*x.bounds)) + bboxes.append(bbox) + # Return metadata dataframe + df = pd.DataFrame({"id": ids, "time": times, "bbox": bboxes}) + return df + def _load(self, metadata, extent): # Check if extent is valid. if TIME not in extent.dims: @@ -330,13 +429,21 @@ def _load(self, metadata, extent): # ODC takes care of spatial transformations internally. names = {Y: "y", X: "x", TIME: "time"} like = extent.sq.tz_convert(self.tz).sq.rename_dims(names).to_dataset() + # Define predicate function. + def filter_id(dataset): + if self._data_dict is not None: + lyr = "_".join(metadata['reference']) + return dataset.id in self._data_dict[lyr] + else: + return True # Call ODC load function to load data as xarray dataset. data = self.connection.load( product = metadata["product"], measurements = [metadata["name"]], like = like, resampling = self.config["resamplers"][metadata["type"]], - group_by = "solar_day" if self.config["group_by_solar_day"] else None + group_by = "solar_day" if self.config["group_by_solar_day"] else None, + dataset_predicate = filter_id ) # Return as xarray dataarray. try: @@ -600,6 +707,74 @@ def retrieve(self, *reference, extent): data = data.astype("float") return data + + def retrieve_metadata(self, *reference, extent): + """Retrieve metadata for a data layer from the EO data cube. + Metadata contains the timestamp & spatial bounding box, both in the + extent objects spatio-temporal reference systems (tz and crs). + + Parameters + ---------- + *reference: + The index of the data layer in the layout of the EO data cube. + extent : :obj:`xarray.DataArray` + Spatio-temporal extent in which the data should be retrieved. Should be + given as an array with a temporal dimension and two spatial dimensions, + such as returned by + :func:`parse_extent `. + The retrieved subset of the EO data cube will have the same extent. + + Returns + ------- + :obj:`pd.DataFrame` + The metadata for the data layer. + """ + # Solve the reference by obtaining the corresponding metadata object. + metadata = self.lookup(*reference) + # Check if extent is valid + if TIME not in extent.dims: + raise exceptions.MissingDimensionError( + "Cannot retrieve data in an extent without a temporal dimension" + ) + if X not in extent.dims or Y not in extent.dims: + raise exceptions.MissingDimensionError( + "Cannot retrieve data in an extent without spatial dimensions" + ) + # Initialize lists to collect metadata items + ids = [] + times = [] + bboxes = [] + # Open the zip file + with zipfile.ZipFile(self.src, 'r') as z: + # Open the specific file inside the zip using rasterio MemoryFile + with z.open(metadata["file"]) as file: + with MemoryFile(file) as memfile: + with memfile.open() as src: + # Get bounding box in destination CRS + dst_crs = pyproj.CRS(extent.rio.crs) + transformer = pyproj.Transformer.from_crs( + pyproj.CRS(src.crs), + dst_crs, + always_xy=True + ) + transformed_bounds = transformer.transform_bounds(*src.bounds) + bbox = box(*transformed_bounds) + # Subset spatially. + if box(*extent.rio.bounds()).intersects(bbox): + for i, idx in enumerate(src.indexes): + # Get datetime in destination tz + dt = np.datetime64(src.descriptions[i]) + dt = utils.convert_datetime64(dt, self.tz, extent.sq.tz) + # Subset temporally. + bounds = extent[TIME].values + if dt >= bounds[0] and dt <= bounds[1]: + ids.append(idx) + times.append(dt) + bboxes.append(bbox) + # Return metadata dataframe + df = pd.DataFrame({"id": ids, "time": times, "bbox": bboxes}) + return df + def _load(self, metadata, extent): # Check if extent is valid. if TIME not in extent.dims: @@ -766,7 +941,7 @@ def src(self, value): if value is not None: assert np.all([isinstance(x, pystac.item.Item) for x in value]) self._src = value - + @property def _default_config(self): return { @@ -880,6 +1055,96 @@ def retrieve(self, *reference, extent): data = data.sq.trim() return data + def retrieve_metadata(self, *reference, extent): + """Retrieve metadata for a data layer from the EO data cube. + Metadata contains the timestamp & spatial bounding box, both in the + extent objects spatio-temporal reference systems (tz and crs). + + Parameters + ---------- + *reference: + The index of the data layer in the layout of the EO data cube. + extent : :obj:`xarray.DataArray` + Spatio-temporal extent in which the data should be retrieved. Should be + given as an array with a temporal dimension and two spatial dimensions, + such as returned by + :func:`parse_extent `. + The retrieved subset of the EO data cube will have the same extent. + + Returns + ------- + :obj:`pd.DataFrame` + The metadata for the data layer. + """ + # Solve the reference by obtaining the corresponding metadata object. + metadata = self.lookup(*reference) + # Check if extent is valid + if TIME not in extent.dims: + raise exceptions.MissingDimensionError( + "Cannot retrieve data in an extent without a temporal dimension" + ) + if X not in extent.dims or Y not in extent.dims: + raise exceptions.MissingDimensionError( + "Cannot retrieve data in an extent without spatial dimensions" + ) + # Subset temporally and spatially + if "spatial_feats" in extent.coords: + extent = extent.drop_vars("spatial_feats") + epsg = int(str(extent.rio.crs)[5:]) + t_bounds = extent.sq.tz_convert(self.tz).time.values + item_coll = STACCube._filter_spatio_temporal( + self.src, + extent.rio.bounds(), + epsg, + t_bounds[0], + t_bounds[1] + ) + # Subset by layer key + filtered_items = [] + for item in item_coll: + for asset_name, asset in item.assets.items(): + if asset_name == metadata["name"]: + if 'semantique:key' in asset.extra_fields: + asset_key = asset.extra_fields['semantique:key'] + if "_".join(asset_key) == "_".join(reference): + keep = True + break + else: + keep = False + else: + if any([ + 'semantique:key' in x.extra_fields + for x in item.assets.values() + ]): + keep = False + else: + keep = True + break + else: + keep = False + if keep: + filtered_items.append(item) + item_coll = filtered_items + # STAC Item ID is not globally unique (only within a collection) + coll_ids = [x.get_collection().id for x in item_coll] + item_ids = [x.id for x in item_coll] + ids = [(id1, id2) for id1, id2 in zip(coll_ids, item_ids)] + # Retrieve timestamps in destination tz + times = [x.get_datetime().replace(tzinfo=None) for x in item_coll] + times = [utils.convert_datetime64(x, self.tz, extent.sq.tz) for x in times] + # Retrieve bounding boxes in destination CRS + dst_crs = pyproj.CRS(extent.rio.crs) + transformer = pyproj.Transformer.from_crs( + pyproj.CRS("EPSG:4326"), + dst_crs, + always_xy=True + ) + bboxes = [transformer.transform_bounds(*x.bbox) for x in item_coll] + bboxes = [box(*x) for x in bboxes] + # Return metadata dataframe + df = pd.DataFrame({"id": ids, "time": times, "bbox": bboxes}) + return df + def _load(self, metadata, extent): # check if extent is valid if TIME not in extent.dims: @@ -908,7 +1173,7 @@ def _load(self, metadata, extent): if "spatial_feats" in extent.coords: extent = extent.drop_vars("spatial_feats") t_bounds = extent.sq.tz_convert(self.tz).time.values - item_coll = STACCube.filter_spatio_temporal( + item_coll = STACCube._filter_spatio_temporal( self.src, extent.rio.bounds(), epsg, @@ -916,12 +1181,14 @@ def _load(self, metadata, extent): t_bounds[1] ) - # subset according to layer key + # subset according to matching layer key + # semantique:key as it may be specified in the asset properties of an + # item is matched with the layer reference as given by the layout file filtered_items = [] for item in item_coll: has_no_key = True has_conformant_key = False - for asset_key, asset in item.assets.items(): + for asset in item.assets.values(): if 'semantique:key' in asset.extra_fields: has_no_key = False asset_key = asset.extra_fields['semantique:key'] @@ -1065,7 +1332,7 @@ def _divide_chunks(lst, k): return [lst[i : i + k] for i in range(0, len(lst), k)] @staticmethod - def filter_spatio_temporal(item_collection, bbox, bbox_crs, start_datetime, end_datetime): + def _filter_spatio_temporal(item_collection, bbox, bbox_crs, start_datetime, end_datetime): """ Filter item collection by spatio-temporal extent. @@ -1152,4 +1419,4 @@ def _sign_metadata(items): else: updated_items.extend(curr_colls[coll]["items"]) # return signed items - return pystac.ItemCollection(updated_items) \ No newline at end of file + return pystac.ItemCollection(updated_items) diff --git a/semantique/mapping.py b/semantique/mapping.py index cc93ba0c..c031131d 100644 --- a/semantique/mapping.py +++ b/semantique/mapping.py @@ -1,8 +1,8 @@ from abc import abstractmethod from semantique import exceptions -from semantique.processor.core import QueryProcessor, FakeProcessor -from semantique.processor.arrays import Collection +from semantique.processor.core import QueryProcessor, FakeProcessor, FilterProcessor +from semantique.processor.arrays import Collection, MetaCollection from semantique.processor import reducers from semantique.visualiser.visualise import show @@ -154,10 +154,16 @@ def translate(self, *reference, property = None, extent, datacube, if len(properties) == 1: out = properties[0] else: - out = Collection(properties).merge( - reducers.all_, - track_types=processor.track_types - ) + if type(processor) == FilterProcessor: + out = MetaCollection(properties).merge( + reducers.all_, + track_types=processor.track_types + ) + else: + out = Collection(properties).merge( + reducers.all_, + track_types=processor.track_types + ) else: try: property = ruleset[property] diff --git a/semantique/processor/arrays.py b/semantique/processor/arrays.py index caf703a0..3dd6d54d 100644 --- a/semantique/processor/arrays.py +++ b/semantique/processor/arrays.py @@ -18,6 +18,8 @@ @xr.register_dataarray_accessor("sq") class Array(): """Internal representation of a multi-dimensional array. + To be used in conjunction with the `semantique.processor.QueryProcessor` or + `semantique.processor.FakeProcessor`. This data structure is modelled as an accessor of :class:`xarray.DataArray`. Using accessors instead of the common class inheritance is recommended by the @@ -161,7 +163,7 @@ def evaluate(self, operator, y = None, track_types = True, **kwargs): """ operands = tuple([self._obj]) if y is None else tuple([self._obj, y]) - out = operator(*operands, track_types = track_types, **kwargs) + out = operator(*operands, track_types = track_types, meta = False, **kwargs) return out def extract(self, dimension, component = None, **kwargs): @@ -1468,8 +1470,1373 @@ def to_geotiff(self, file, cloud_optimized = True, compress = True, ) return file +@xr.register_dataarray_accessor("sqm") +class MetaArray(Array): + """Internal representation of a multi-dimensional meta array. + To be used in conjunction with the `semantique.processor.FilterProcessor`. + + Note: Track-type attributes are kept to have a compatible structure with the + `semantique.processor.Array` but are effectively ignored. + + This data structure is modelled as an accessor of :class:`xarray.DataArray`. + Using accessors instead of the common class inheritance is recommended by the + developers of xarray, see `here`_. In practice, this means that each method + of this class can be called as method of :obj:`xarray.DataArray` objects by + using the ``.sqm`` prefix: :: + + xarray_obj.sqm.method + + Parameters + ---------- + xarray_obj : :obj:`xarray.DataArray` + The content of the array. + + .. _here: + https://xarray.pydata.org/en/stable/internals/extending-xarray.html + + """ + + def __init__(self, xarray_obj): + super(MetaArray, self).__init__(xarray_obj) + + @property + def active(self): + """:obj:`str`: Active evaluation object.""" + try: + return self._obj.attrs["active"] + except KeyError: + return None + + @active.setter + def active(self, value): + self._obj.attrs["active"] = value + + @active.deleter + def active(self): + """:obj:`bool`: Is the current object actively monitored? + This guides how the object is evaluated in some operations + Active objects should be those that contain data from the + current `watch_layer` (processor.core.FilterProcessor). + """ + try: + del self._obj.attrs["active"] + except KeyError: + pass + + @property + def locked(self): + """:obj:`bool`: Is the evaluation of filter operations restricted? + If locked, filter operations will be ignored.""" + try: + return self._obj.attrs["locked"] + except KeyError: + return None + + @locked.setter + def locked(self, value): + self._obj.attrs["locked"] = value + + @locked.deleter + def locked(self): + try: + del self._obj.attrs["locked"] + except KeyError: + pass + + @property + def vault(self): + """:obj:`list`: Tresor which can be used to store timestamps + if the time dimension in the array itself has been reduced away.""" + try: + return self._obj.attrs["vault"] + except KeyError: + return None + + @vault.setter + def vault(self, value): + self._obj.attrs["vault"] = value + + @vault.deleter + def vault(self): + try: + del self._obj.attrs["vault"] + except KeyError: + pass + + @property + def value_type(self): + """:obj:`str`: The value type of the array.""" + try: + return self._obj.attrs["value_type"] + except KeyError: + return None + + @value_type.setter + def value_type(self, value): + self._obj.attrs["value_type"] = value + + @value_type.deleter + def value_type(self): + try: + del self._obj.attrs["value_type"] + except KeyError: + pass + + # + # VERBS + # + + def evaluate(self, operator, y = None, track_types = False, **kwargs): + """Apply the evaluate verb to the array. + + The evaluate verb evaluates an expression for each pixel in an array. + + Parameters + ---------- + operator : :obj:`callable` + Operator function to be used in the expression. + y : optional + Right-hand side of the expression. May be a constant, meaning that the + same value is used in each expression. May also be another array + which can be aligned to the same shape as the input array. In the latter + case, when evaluating the expression for a pixel in the input array the + second operand is the value of the pixel in array ``y`` that has the same + dimension coordinates. Ignored when the operator is univariate. + track_types : :obj:`bool` + Should the operator promote the value type of the output object, based + on the value type(s) of the operand(s)? + **kwargs: + Additional keyword arguments passed on to the operator function. + + Returns + -------- + :obj:`xarray.DataArray` + + """ + # once issue 54 (https://github.com/ZGIS/semantique/issues/54) is solved, + # the code can be simplified by omitting the twoway_ops differentiations + + operands = tuple([self._obj]) if y is None else tuple([self._obj, y]) + # Temporal operations should always be evaluated. + if operands[0].dims == ('time',): + out = operator(*operands, track_types = track_types, meta = True, **kwargs) + out.sqm.active = self.active + out.sqm.locked = False + out.sqm.vault = self.vault + # If y is another array, the need to evaluate depends on the operator and + # the active status of the array. + elif y is not None and hasattr(y, "sqm"): + # Currently only some operators are operating in two ways. + # Two-way => Filtering (= passing of NaNs) regardless if y is active. + twoway_ops = [ + "add_", + "subtract_", + "multiply_", + "divide_", + "power_", + "normalized_difference_" + ] + if y.sqm.active: + if self.active or operator.__name__ in twoway_ops: + out = operator(*operands, track_types = track_types, meta = True, **kwargs) + out.sqm.vault = self._merge_arrays_vaults(list(operands)) + else: + out = y + out.sqm.vault = y.sqm.vault + out.sqm.active = True + out.sqm.locked = True if any([self.locked, y.sqm.locked]) else False + else: + if operator.__name__ in twoway_ops: + out = operator(*operands, track_types = track_types, meta = True, **kwargs) + if self.active: + out.sqm.active = True + else: + out.sqm.active = False + out.sqm.locked = True if any([self.locked, y.sqm.locked]) else False + out.sqm.vault = self._merge_arrays_vaults(list(operands)) + else: + out = self._obj + # Only NaN values should be passed through the filter. + # Others are result of content-based ops and should be reset to 1. + out.values = np.where(~np.isnan(out.values), 1, np.nan) + else: + # Missing ops invert the filter result and should be evaluated. + if operator.__name__ in ["is_missing_", "not_missing_"]: + out = operator(*operands, track_types = track_types, meta = True, **kwargs) + out.sqm.active = self.active + out.sqm.locked = self.locked + out.sqm.vault = self.vault + else: + out = self._obj + return out + + + def extract(self, dimension, component = None, **kwargs): + """Apply the extract verb to the array. + + The extract verb extracts coordinate labels of a dimension as a new + array. + + Parameters + ----------- + dimension : :obj:`str` + Name of the dimension to be extracted. + component : :obj:`str`, optional + Name of a specific component of the dimension coordinates to be + extracted, e.g. *year*, *month* or *day* for temporal dimension + coordinates. + **kwargs: + Ignored. + + Returns + -------- + :obj:`xarray.DataArray` + + Raises + ------- + :obj:`exceptions.UnknownDimensionError` + If a dimension with the given name is not present in the array. + :obj:`exceptions.UnknownComponentError` + If the given dimension does not contain the given component. + + """ + # Get array. + obj = self._obj + # Extract spatial or temporal dimension(s). + if dimension == TIME: + out = self._extract_time(obj, component) + out.sqm.active = self.active + out.sqm.locked = self.locked + out.sqm.vault = self.vault + return out + if dimension == SPACE: + out = self._extract_space(obj, component) + out.sqm.active = self.active + out.sqm.locked = self.locked + out.sqm.vault = self.vault + return out + # Extract any other dimension. + try: + out = obj[dimension] + except KeyError: + raise exceptions.UnknownDimensionError( + f"Dimension '{dimension}' is not present in the array" + ) + if component is not None: + try: + out = out[component] + except KeyError: + raise exceptions.UnknownComponentError( + f"Component '{component}' is not defined for dimension '{dimension}'" + ) + out.sqm.active = self.active + out.sqm.locked = self.locked + out.sqm.vault = self.vault + return out + + @staticmethod + def _extract_space(obj, component = None): + if component is None: + try: + out = obj.sqm.stack_spatial_dims()[SPACE] + except KeyError: + raise exceptions.UnknownDimensionError( + f"Spatial dimensions '{X}' and '{Y}' are not present in the array" + ) + out._variable = out._variable.to_base_variable() + out = out.sqm.unstack_spatial_dims() + out.sqm.value_type = "coords" + else: + # Component FEATURE should extract spatial feature indices. + if component == components.space.FEATURE: + cname = "spatial_feats" + else: + cname = component + try: + out = obj[cname] + except KeyError: + raise exceptions.UnknownComponentError( + f"Component '{cname}' is not defined for dimension '{SPACE}'" + ) + return out + + @staticmethod + def _extract_time(obj, component = None): + try: + out = obj[TIME] + except KeyError: + raise exceptions.UnknownDimensionError( + f"Dimension '{TIME}' is not present in the array" + ) + if component is not None: + try: + out = out[component] + except KeyError: + aliases = { + "day_of_week": "dayofweek", + "day_of_year": "dayofyear" + } + try: + component = aliases[component] + except KeyError: + pass + try: + out = getattr(out.dt, component) + except AttributeError: + raise exceptions.UnknownComponentError( + f"Component '{component}' is not defined for dimension '{TIME}'" + ) + else: + out = utils.parse_datetime_component(component, out) + return out + + def filter(self, filterer, track_types = False, **kwargs): + """Apply the filter verb to the array. + + The filter verb filters the values in an array. + + Parameters + ----------- + filterer : :obj:`xarray.DataArray` + Binary array which can be aligned to the same shape as the input array. + Each pixel in the input array will be kept if the pixel in the filterer + with the same dimension coordinates is true, and dropped otherwise + (i.e. assigned a nodata value). + track_types : :obj:`bool` + Should it be checked that the filterer has value type *binary*? + **kwargs: + Ignored. + + Returns + -------- + :obj:`xarray.DataArray` + + """ + # Apply filter only if not locked. + if any([self.locked, filterer.sqm.locked]): + out = self._obj + else: + # Align/Broadcast + filterer = filterer.sqm.align_with(self._obj) + # Xarray treats null values as True but they should not pass the filter. + # Note: This also applies to the all NaN restored due to align_with. + # Consequence: Null as zero called after align_with. + filterer.values = utils.null_as_zero(filterer) + out = self._obj.where(filterer) + out.sqm.active = any([self.active, filterer.sqm.active]) + out.sqm.locked = False + out.sqm.vault = self._merge_arrays_vaults([self._obj, filterer]) + return out + + def assign(self, y, at = None, track_types = False, **kwargs): + """Apply the assign verb to the array. This method is structured the same + way as it is structured for Array objects but effectively muted, i.e. the + current object is simply handed back. + + Parameters + ---------- + y : + Value(s) to be assigned. May be a constant, meaning that the same value + is assigned to every pixel. May also be another array which can be + aligned to the same shape as the input array. In the latter case, the + value assigned to a pixel in the input array is the value of the pixel in + array ``y`` that has the same dimension coordinates. + at : :obj:`xarray.DataArray`, optional + Binary array which can be aligned to the same shape as the input array. + To be used for conditional assignment, in which a pixel in the input will + only be assigned a new value if the if the pixel in ``at`` with the same + dimension coordinates is true. + track_types : :obj:`bool` + Should the value type of the output object be promoted, and should it be + checked that ``at`` has value type *binary*? + **kwargs: + Ignored. + + Returns + -------- + :obj:`xarray.DataArray` + + """ + out = self._obj + return out + + def groupby(self, grouper, labels_as_names = True, **kwargs): + """Apply the groupby verb to the array. + + The groupby verb groups the values in an array. + + Parameters + ----------- + grouper : :obj:`xarray.DataArray` or :obj:`Collection` + Array which can be aligned to the same shape as the input array. Pixels + in the input array that have equal values in the grouper will be + grouped together. Alternatively, it may be a collection of such arrays. + Then, pixels in the input array that have equal values in all of the + grouper arrays will be grouped together. + labels_as_names : :obj:`bool` + If value labels are defined, should they be used as group names instead + of the values themselves? + **kwargs: + Ignored. + + Returns + -------- + :obj:`Collection` + + Raises + ------- + :obj:`exceptions.MissingDimensionError` + If the grouper is zero-dimensional. + :obj:`exceptions.UnknownDimensionError` + If the grouper contains dimensions that are not present in the input. + :obj:`exceptions.MixedDimensionsError` + If the grouper is a collection and its elements don't all have the same + dimensions. + + """ + # Get dimensions of the input. + obj = self._obj + odims = obj.dims + # Get dimensions of the grouper(s). + if isinstance(grouper, list): + is_list = True + gdims = [x.dims for x in grouper] + if not all([x == gdims[0] for x in gdims]): + raise exceptions.MixedDimensionsError( + "Dimensions of grouper arrays do not match" + ) + else: + is_list = False + gdims = [grouper.dims] + grouper = [grouper] + # Parse grouper. + # When grouper is multi-dimensional, dimensions should be stacked. + if len(gdims[0]) == 0: + raise exceptions.MissingDimensionError( + "Cannot group with a zero-dimensional grouper" + ) + elif len(gdims[0]) == 1: + is_spatial = False + is_multidim = False + if not gdims[0][0] in odims: + raise exceptions.UnknownDimensionError( + f"Grouper dimension '{gdims[0][0]}' is not present in the array" + ) + elif len(gdims[0]) == 2 and X in gdims[0] and Y in gdims[0]: + is_spatial = True + is_multidim = False + grouper = [x.sqm.stack_spatial_dims() for x in grouper] + try: + obj = obj.sqm.stack_spatial_dims() + except KeyError: + raise exceptions.UnknownDimensionError( + f"Spatial dimensions '{X}' and '{Y}' are not present in the array" + ) + else: + is_spatial = False + is_multidim = True + if not all(x in odims for x in gdims[0]): + raise exceptions.UnknownDimensionError( + "Not all grouper dimensions are present in the array" + ) + grouper = [x.sqm.align_with(obj).sqm.stack_all_dims() for x in grouper] + obj = obj.sqm.stack_all_dims() + # Split input into groups based on unique grouper values. + if is_list: + idx = pd.MultiIndex.from_arrays([x.data for x in grouper]) + dim = grouper[0].dims + partition = list(obj.groupby(xr.IndexVariable(dim, idx), squeeze=False)) + # Use value labels as group names if defined. + if labels_as_names: + labs = [x.sqm.value_labels for x in grouper] + names = [i[0] for i in partition] + for i, x in enumerate(labs): + if x is None: + pass + else: + for j, y in enumerate(names): + y = list(y) + y[i] = x[y[i]] + names[j] = tuple(y) + groups = [i[1].rename(j) for i, j in zip(partition, names)] + else: + groups = [i[1].rename(i[0]) for i in partition] + else: + partition = list(obj.groupby(grouper[0], squeeze=False)) + # Use value labels as group names if defined. + if labels_as_names: + labs = grouper[0].sqm.value_labels + if labs is not None: + groups = [i[1].rename(labs[i[0]]) for i in partition] + else: + groups = [i[1].rename(i[0]) for i in partition] + else: + groups = [i[1].rename(i[0]) for i in partition] + # Post-process. + # Stacked arrays must be unstacked again. + if is_spatial: + groups = [x.sqm.unstack_spatial_dims() for x in groups] + elif is_multidim: + # Multi-dimensional grouping may create irregular spatial dimensions. + # Therefore besides unstacking we also need to regularize the arrays. + groups = [x.sqm.unstack_all_dims().sqm.regularize() for x in groups] + # Stacking messes up the spatial feature indices coordinate. + # We need to re-create this coordinate for each group array. + if "spatial_feats" in self._obj.coords: + def fix(x, y): + x["spatial_feats"] = y["spatial_feats"].reindex_like(x) + return x + groups = [fix(x, self._obj) for x in groups] + # Collect and return. + out = MetaCollection(groups) + out.active = True if any([x.sqm.active for x in groups]) else False + out.locked = True if any([x.sqm.locked for x in groups]) else False + out.vault = self._merge_arrays_vaults(groups) + return out + + def reduce( + self, + reducer, + dimension = None, + track_types = False, + keep_attrs = True, + **kwargs + ): + """Apply the reduce verb to the array. + + The reduce verb reduces the dimensionality of an array. + + Parameters + ----------- + reducer : :obj:`callable` + The reducer function to be applied. + dimension : :obj:`str` + Name of the dimension to apply the reduction function to. If + :obj:`None`, all dimensions are reduced. + track_types : :obj:`bool` + Should the reducer promote the value type of the output object, based + on the value type of the input object? + keep_attrs: :obj:`bool` + Should the variable's attributes (attrs) be copied from the + original object to the new one? + **kwargs: + Additional keyword arguments passed on to the reducer function. These + should not include a keyword argument "dim", which is reserved for + specifying the dimension to reduce over. + + Returns + -------- + :obj:`xarray.DataArray` + + Raises + ------ + :obj:`exceptions.UnknownDimensionError` + If a dimension with the given name is not present in the array. + + """ + # Get array and set reduction dimension. + obj = self._obj + if dimension is not None: + if dimension == SPACE: + if X not in obj.dims or Y not in obj.dims: + raise exceptions.UnknownDimensionError( + f"Spatial dimensions '{X}' and '{Y}' are not present in the array" + ) + obj = self.stack_spatial_dims() + else: + if dimension not in obj.dims: + raise exceptions.UnknownDimensionError( + f"Dimension '{dimension}' is not present in the array" + ) + kwargs["dim"] = dimension + # If reduce-over-time persist temporal indices in vault. + # Rationale: temporal indices are lost when reducing over time but + # are needed as part of the response object. + if ( + self.active and + (dimension == TIME or (dimension is None and TIME in obj.dims)) + ): + # Reduce additional dimensions first. + if obj.ndim > 3: + dim = [x for x in obj.dims if x not in [TIME, X, Y]] + assert len(dim) == 1, "Only one dimension can be reduced" + obj_to_save = obj.sqm.reduce( + reducers.any_, + dim[0], + track_types = track_types + ) + else: + obj_to_save = obj + # Extract valid temporal indices & set as attrs. + reduce_dims = [x for x in obj_to_save.dims if x != TIME] + t_idxs = obj_to_save.time[obj_to_save.isnull().sum(reduce_dims) == 0] + obj.sqm.vault = t_idxs + # Reduce. + out = reducer( + obj, + track_types = track_types, + keep_attrs = keep_attrs, + **kwargs + ) + return out + + def shift(self, dimension, steps, **kwargs): + """Apply the shift verb to the array. This method is structured the same way + as it is structured for Array objects but effectively muted, + i.e. the current object is simply handed back. + + The shift verb shifts the values in an array a given amount of steps along + a dimension. + + Parameters + ----------- + dimension : :obj:`str` + Name of the dimension to shift along. + steps : :obj:`int` + Amount of steps each value should be shifted. A negative integer will + result in a shift to the left, while a positive integer will result in + a shift to the right. A shift along the spatial dimension follows the + pixel order defined by the CRS, e.g. starting in the top-left and + moving down each column. + **kwargs: + Ignored. + + Returns + -------- + :obj:`xarray.DataArray` + + """ + out = self._obj + out.sqm.locked = True + return out + + def smooth(self, reducer, dimension, size, limit = 2, fill = False, + track_types = False, **kwargs): + """Apply the smooth verb to the array. This method is structured the same way + as it is structured for Array objects but effectively muted, + i.e. the current object is simply handed back. + + The smooth verb smoothes the values in an array by applying a reducer + function to a rolling window along a dimension. + + Parameters + ----------- + reducer : :obj:`callable` + The reducer function to be applied to the rolling window. + dimension : :obj:`str` + Name of the dimension to smooth along. + size : :obj:`int` + Size k defining the extent of the rolling window. The pixel being + smoothed will always be in the center of the window, with k pixels at + its left and k pixels at its right. If the dimension to smooth over is + the spatial dimension, the size will be used for both the X and Y + dimension, forming a sqmuare window with the smoothed pixel in the + middle. + limit : :obj:`int` + Minimum number of valid data values inside a window. If the window + contains less than this number of data values (excluding nodata) the + smoothed value will be nodata. + fill : :obj:`bool` + Should pixels with a nodata value also be smoothed? + track_types : :obj:`bool` + Should the reducer promote the value type of the output object, based + on the value type of the input object? + **kwargs: + Additional keyword arguments passed on to the reducer function. These + should not include a keyword argument "dim", which is reserved for + specifying the dimension to reduce over. + + Returns + -------- + :obj:`xarray.DataArray` + + """ + out = self._obj + out.sqm.locked = True + return out + + def trim(self, dimension = None, **kwargs): + """Apply the trim verb to the array. + + The trim verb trims the dimensions of an array, meaning that all dimension + coordinates for which all values are missing are removed from the array. + The spatial dimensions are only trimmed at their edges, to preserve their + regularity. + + Parameters + ---------- + dimension : :obj:`str` + Name of the dimension to be trimmed. If :obj:`None`, all dimensions + will be trimmed. + + Returns + ------- + :obj:`xarray.DataArray` + + Raises + ------ + :obj:`exceptions.UnknownDimensionError` + If a dimension with the given name is not present in the array. + + """ + obj = self._obj + dims = obj.dims + if dimension is None: + if X in dims and Y in dims: + regular_dims = [d for d in dims if d not in [X, Y]] + out = self._trim_space(self._trim(obj, regular_dims)) + else: + out = self._trim(obj, dims) + else: + if dimension == SPACE: + if X not in dims or Y not in dims: + raise exceptions.UnknownDimensionError( + f"Spatial dimensions '{X}' and '{Y}' are not present in the array" + ) + out = self._trim_space(obj) + else: + if dimension not in obj.dims: + raise exceptions.UnknownDimensionError( + f"Dimension '{dimension}' is not present in the array" + ) + out = self._trim(obj, [dimension]) + return out + + def delineate(self, track_types = False, **kwargs): + """Apply the delineate verb to the array. This method is structured the same way + as it is structured for Array objects but effectively muted, + i.e. the current object is simply handed back. + + The delineate verb deliniates spatio-temporal objects in a binary array. + + Parameters + ----------- + track_types : :obj:`bool` + Should the value type of the output object be promoted, and should it be + checked that the input has value type *binary*? + **kwargs: + Ignored. + + Returns + -------- + :obj:`xarray.DataArray` + + """ + return self._obj + + def fill(self, dimension, method, extrapolate = True, track_types = False, + **kwargs): + """Apply the fill verb to the array. This method is structured the same way + as it is structured for Array objects but effectively muted, + i.e. the current object is simply handed back. + + The fill verb fills nodata values by interpolating valid data values. + + Parameters + ----------- + dimension : :obj:`str` + Name of the dimension along which to interpolate. + method : :obj:`str` + Interpolation method to use. One of nearest, linear or cubic. When + interpolation along the stacked space dimensions, the two-dimensional + versions of these interpolation methods are used, i.e. 2D nearest + neighbour, bilinear and bicubic. + extrapolate : :obj:`bool` + Should nodata values at the edge be extrapolated? Only applied to + one-dimensional interpolation. + track_types : :obj:`bool` + Should the value type(s) of the input(s) be checked, and the value + type of the output be promoted, whenever applicable? + **kwargs" + Additional keyword arguments passed on to the interpolation function. + When interpolating along a single dimension, the interpolation function + is :meth:`xarray.DataArray.interpolate_na`. + When interpolation along the stacked space dimension, the interpolation + funtion is :meth:`rioxarray.raster_array.RasterArray.interpolate_na`. + + Returns + -------- + :obj:`xarray.DataArray` + + """ + out = self._obj + out.sqm.locked = True + return out + + def name(self, value, **kwargs): + """Apply the name verb to the array. + + The name verb assigns a name to an array. + + Parameters + ----------- + value : :obj:`str` + Character sting to be assigned as name to the input array. + **kwargs: + Ignored. + + Returns + -------- + :obj:`xarray.DataArray` + + """ + out = self._obj.rename(value) + return out + + def apply_custom(self, verb, track_types = False, **kwargs): + """Apply a user-defined verb to the array. This method is structured the + same way as it is structured for Array objects but effectively muted, + i.e. the current object is simply handed back. + + Parameters + ----------- + verb : :obj:`callable` + Implementation of the custom verb which will be provided to + :meth:`xarray.DataArray.pipe`. + track_types : :obj:`bool` + Should the value type(s) of the input(s) be checked, and the value + type of the output be promoted, whenever applicable? + **kwargs: + Additional keyword arguments passed on to the verb function. + + Returns + -------- + :obj:`xarray.DataArray` + + """ + out = self._obj + return out + + def align_with(self, other): + """Align the array to the shape of another array. + + An input array is alinged to another array if the pixel at position *i* in + the input array has the same coordinates as the pixel at position *i* in the + other array. Aligning can be done in several ways: + + * Consider the case where the input array has exactly the same dimensions + and coordinates as the other array, but the order of them is different. + In that case, the input array is simply re-ordered to match the other + array. + + * Consider the case where the input array has the same dimensions as the + other array, but not all coordinates match. In that case, the coordinates + that are in the input array but not in the other array are removed from the + input array, and at the same time the coordinates that are in the other + array but not in the input array are added to the input array, with nodata + values assigned. + + * Consider the case where all dimensions of the input array are also present + in the other array, but not all dimensions of the other array are present + in the input array. In that case, the pixels of the input array are + duplicated along those dimensions that are missing. + + Alignment may also be a combination of more than one of these ways. + + Parameters + ----------- + other : :obj:`xarray.DataArray` + Array to which the input array should be aligned. + + Returns + -------- + :obj:`xarray.DataArray` + The aligned input array. + + Raises + ------- + :obj:`exceptions.AlignmentError` + If the input array cannot be aligned to the other array, for example when + the two arrays have no dimensions in common at all, or when the input + array has dimensions that are not present in the other array. + + """ + # Reintroduce time dimension if it was removed. + if ( + TIME in other.dims and + TIME not in self._obj.dims and + self._obj.sqm.vault is not None + ): + self._obj = self._obj.expand_dims({'time': self._obj.sqm.vault}) + # Carry out alignment. + out = xr.align(other, self._obj, join = "left")[1].broadcast_like(other) + if not out.shape == other.shape: + raise exceptions.AlignmentError( + f"Array '{other.name if other.name is not None else 'y'}' " + f"cannot be aligned with " + f"input array '{self._obj.name if self._obj.name is not None else 'x'}'" + ) + return out + + def _merge_arrays_vaults(self, arrays): + """Merge vaults of arrays by taking the union of their elements.""" + if any([x.sqm.active and x.sqm.vault is not None for x in arrays]): + vaults = [ + x.sqm.vault for x in arrays if (x.sqm.active and x.sqm.vault is not None) + ] + vault = np.unique(np.concatenate(vaults)) + out = xr.DataArray(vault, dims=TIME, name=TIME) + else: + out = None + return out + + class Collection(list): """Internal representation of a collection of multiple arrays. + To be used in conjunction with the `semantique.processor.QueryProcessor` or + `semantique.processor.FakeProcessor`. + + Parameters + ---------- + list_obj : :obj:`list` of :obj:`xarray.DataArray` + The elements of the collection stored in a list. + + """ + + def __init__(self, list_obj): + super(Collection, self).__init__(list_obj) + + @property + def sq(self): + """self: Semantique accessor. + + This is merely provided to ensure compatible behaviour with + :obj:`Array ` objects, which are + modelled as an accessor to :obj:`xarray.DataArray` objects. It allows to + call all other properties and methods through the prefix ``.sq``. + + """ + return self + + @property + def is_empty(self): + """:obj:`bool`: Are all elements of the collection empty arrays.""" + return all([x.sq.is_empty for x in self]) + + def compose(self, track_types = True, **kwargs): + """Apply the compose verb to the collection. + + The compose verb creates a categorical composition from the arrays in the + collection. + + Parameters + ----------- + track_types : :obj:`bool` + Should it be checked if all arrays in the collection have value type + *binary*? + **kwargs: + Ignored. + + Returns + -------- + :obj:`xarray.DataArray` + + Raises + ------ + :obj:`exceptions.InvalidValueTypeError` + If ``track_types = True`` and the value type of at least one of the + arrays in the collection is not *binary*. + + """ + if track_types: + value_types = [x.sq.value_type for x in self] + if not all([x is None or x == "binary" for x in value_types]): + raise exceptions.InvalidValueTypeError( + f"Element value types for 'compose' should all be 'binary', " + f"not {np.unique(value_types).tolist()} " + ) + def index_(idx, obj): + return xr.where(obj, idx + 1, np.nan).where(obj.notnull()) + enumerated = enumerate(self) + indexed = [index_(i, x) for i, x in enumerated] + out = indexed[0] + for x in indexed[1:]: + out = out.combine_first(x) + labels = [x.name for x in self] + idxs = range(1, len(labels) + 1) + out.sq.value_type = "nominal" + out.sq.value_labels = {k:v for k, v in zip(idxs, labels)} + return out + + def concatenate(self, dimension, track_types = True, + vtype = "nominal", **kwargs): + """Apply the concatenate verb to the collection. + + The concatenate verb concatenates the arrays in the collection along a new + or existing dimension. + + Parameters + ----------- + dimension : :obj:`str` + Name of the dimension to concatenate along. To concatenate along an + existing dimension, it should be a dimension that exists in all + collection members. To concatenate along a new dimension, it should be + a dimension that does not exist in any of the collection members. + track_types : :obj:`bool` + Should it be checked if all arrays in the collection have the same value + type? + vtype : :obj:`str`: + If the arrays are concatenated along a new dimension, what should the + value type of its dimension coordinates be? Valid options are + "continuous", "discrete", "nominal", "ordinal" and "binary". + **kwargs: + Ignored. + + Returns + -------- + :obj:`xarray.DataArray` + + Raises + ------ + :obj:`exceptions.InvalidValueTypeError` + If ``track_types = True`` and the value types of the arrays in the + collection are not all equal to each other. + + :obj:`exceptions.MissingDimensionError` + If the dimension to concatenate along exists in some but not all + arrays in the collection. + + :obj:`exceptions.ReservedDimensionError` + If the new dimension to concatenate along has one of the names that + semantique reserves for the temporal dimension or spatial dimensions. + + """ + # Check value types. + if track_types: + value_types = [x.sq.value_type for x in self] + if not all([x is None or x == value_types[0] for x in value_types]): + raise exceptions.InvalidValueTypeError( + f"Element value types for 'concatenate' should all be the same, " + f"not {np.unique(value_types).tolist()} " + ) + # Concatenate. + has_dim = [dimension in x.dims for x in self] + if any(has_dim): + if all(has_dim): + # Concatenate over existing dimension. + raw = xr.concat([x for x in self], dimension) + has_duplicated_coords = any(raw.get_index(dimension).duplicated()) + if has_duplicated_coords: + # If arrays have overlapping coordinates for this dimension: + # --> Choose the first non-missing value to be in the output. + # Do this by: + # --> Creating groups for each coordinate value. + # --> Merge these groups using the "first" reducer. + # --> Concatenate the merged groups back together. + def _merge_dups(obj): + coords = obj.get_index(dimension).values + if len(coords) > 1: + dups = [obj.isel({dimension: i}) for i, x in enumerate(coords)] + return Collection(dups).sq.merge(reducers.first_) + else: + return obj + groups = list(raw.groupby(dimension, squeeze=False)) + clean = xr.concat([_merge_dups(x[1]) for x in groups], dimension) + else: + clean = raw + out = clean.sortby(dimension) + else: + raise exceptions.MissingDimensionError( + f"Concatenation dimension '{dimension}' exists in some but not all " + "arrays in the collection" + ) + else: + # Concatenate over new dimension. + if dimension in [TIME, SPACE, X, Y]: + raise exceptions.ReservedDimensionError( + f"Dimension name '{dimension}' is reserved and should not be used " + "as a new dimension name" + ) + names = [x.name for x in self] + coords = pd.Index(names, name = dimension, tupleize_cols = False) + out = xr.concat([x for x in self], coords) + out[dimension].sq.value_type = vtype + out[dimension].sq.value_labels = {x:x for x in names} + # Update value labels. + if track_types: + orig_labs = [x.sq.value_labels for x in self] + if None not in orig_labs: + # If keys are duplicated first array should be prioritized. + # Therefore we first reverse the list of value label dictionaries. + orig_labs.reverse() + new_labs = {k:v for x in orig_labs for k,v in x.items()} + out.sq.value_labels = new_labs + else: + del out.sq.value_labels + else: + del out.sq.value_labels + # Return. + return out + + def merge(self, reducer, track_types = True, **kwargs): + """Apply the merge verb to the collection. + + The merge verb merges the pixel values of all arrays in the collection into + a single value per pixel. + + Parameters + ----------- + reducer : :obj:`str` + Name of the reducer function to be applied in order to reduce multiple + values per pixel into a single value. Should either be one of the + built-in reducers of semantique, or a user-defined reducer which will + be provided to the query processor when executing the query recipe. + track_types : :obj:`bool` + Should it be checked if all arrays in the collection have the same value + type, and should the reducer promote the value type of the output + object, based on the value type of the input objects? + **kwargs: + Additional keyword arguments passed on to the reducer function. + + Returns + -------- + :obj:`xarray.DataArray` + + Raises + ------ + :obj:`exceptions.InvalidValueTypeError` + If ``track_types = True`` and the value types of the arrays in the + collection are not all equal to each other. + + """ + if track_types: + value_types = [x.sq.value_type for x in self] + if not all([x is None or x == value_types[0] for x in value_types]): + raise exceptions.InvalidValueTypeError( + f"Element value types for 'merge' should all be the same, " + f"not {np.unique(value_types).tolist()} " + ) + dim = "__sq__" # Temporary dimension. + concat = self.concatenate(dim, track_types = False) + out = concat.sq.reduce(reducer, dim, track_types, **kwargs) + return out + + def evaluate(self, operator, y = None, track_types = True, **kwargs): + """Apply the evaluate verb to all arrays in the collection. + + See :meth:`Array.evaluate` + + Returns + ------- + :obj:`Collection` + + """ + args = tuple([operator, y, track_types]) + out = copy.deepcopy(self) + out[:] = [x.sq.evaluate(*args, **kwargs) for x in out] + return out + + def extract(self, dimension, component = None, **kwargs): + """Apply the extract verb to all arrays in the collection. + + See :meth:`Array.extract` + + Returns + ------- + :obj:`Collection` + + """ + args = tuple([dimension, component]) + out = copy.deepcopy(self) + out[:] = [x.sq.extract(*args, **kwargs) for x in out] + return out + + def filter(self, filterer, track_types = True, **kwargs): + """Apply the filter verb to all arrays in the collection. + + See :meth:`Array.filter` + + Returns + ------- + :obj:`Collection` + + """ + args = tuple([filterer, track_types]) + out = copy.deepcopy(self) + out[:] = [x.sq.filter(*args, **kwargs) for x in out] + return out + + def assign(self, y, at = None, track_types = True, **kwargs): + """Apply the assign verb to all arrays in the collection. + + See :meth:`Array.assign` + + Returns + ------- + :obj:`Collection` + + """ + args = tuple([y, at, track_types]) + out = copy.deepcopy(self) + out[:] = [x.sq.assign(*args, **kwargs) for x in out] + return out + + def reduce(self, reducer, dimension = None, track_types = True, **kwargs): + """Apply the reduce verb to all arrays in the collection. + + See :meth:`Array.reduce` + + Returns + ------- + :obj:`Collection` + + """ + args = tuple([reducer, dimension, track_types]) + out = copy.deepcopy(self) + out[:] = [x.sq.reduce(*args, **kwargs) for x in out] + return out + + def shift(self, dimension, steps, **kwargs): + """Apply the shift verb to all arrays in the collection. + + See :meth:`Array.shift` + + Returns + ------- + :obj:`Collection` + + """ + args = tuple([dimension, steps]) + out = copy.deepcopy(self) + out[:] = [x.sq.shift(*args, **kwargs) for x in out] + return out + + def smooth(self, reducer, dimension, size, limit = 2, fill = False, + track_types = True, **kwargs): + """Apply the smooth verb to all arrays in the collection. + + See :meth:`Array.smooth` + + Returns + ------- + :obj:`Collection` + + """ + args = tuple([reducer, dimension, size, limit, fill, track_types]) + out = copy.deepcopy(self) + out[:] = [x.sq.smooth(*args, **kwargs) for x in out] + return out + + def trim(self, dimension = None, **kwargs): + """Apply the trim verb to all arrays in the collection. + + See :meth:`Array.trim` + + Returns + ------- + :obj:`Collection` + + """ + out = copy.deepcopy(self) + out[:] = [x.sq.trim(dimension, **kwargs) for x in out] + return out + + def delineate(self, track_types = True, **kwargs): + """Apply the delineate verb to all arrays in the collection. + + See :meth:`Array.delineate` + + Returns + ------- + :obj:`Collection` + + """ + out = copy.deepcopy(self) + out[:] = [x.sq.delineate(track_types, **kwargs) for x in out] + return out + + def fill(self, dimension, method, track_types = True, **kwargs): + """Apply the fill verb to all arrays in the collection. + + See :meth:`Array.fill` + + Returns + ------- + :obj:`Collection` + + """ + args = tuple([dimension, method, track_types]) + out = copy.deepcopy(self) + out[:] = [x.sq.fill(*args, **kwargs) for x in out] + return out + + def name(self, value, **kwargs): + """Apply the name verb to all arrays in the collection. + + See :meth:`Array.name` + + Returns + ------- + :obj:`Collection` + + """ + out = copy.deepcopy(self) + out[:] = [x.sq.name(value, **kwargs) for x in out] + return out + + def apply_custom(self, verb, track_types = True, **kwargs): + """Apply a user-defined verb to all arrays in the collection. + + See :meth:`Array.apply_custom` + + Returns + ------- + :obj:`Collection` + + """ + args = tuple([verb, track_types]) + out = copy.deepcopy(self) + out[:] = [x.sq.apply_custom(*args, **kwargs) for x in out] + return out + + def regularize(self): + """Regularize the spatial dimension of all arrays in the collection. + + See :meth:`Array.regularize` + + Returns + ------- + :obj:`Collection` + + """ + out = copy.deepcopy(self) + out[:] = [x.sq.regularize() for x in out] + return out + + def stack_spatial_dims(self): + """Stack the spatial dimensions for all arrays in the collection. + + See :meth:`Array.stack_spatial_dims` + + Returns + ------- + :obj:`Collection` + + """ + out = copy.deepcopy(self) + out[:] = [x.sq.stack_spatial_dims() for x in out] + return out + + def unstack_spatial_dims(self): + """Unstack the spatial dimensions for all arrays in the collection. + + See :meth:`Array.unstack_spatial_dims` + + Returns + ------- + :obj:`Collection` + + """ + out = copy.deepcopy(self) + out[:] = [x.sq.unstack_spatial_dims() for x in out] + return out + +class MetaCollection(Collection): + """Internal representation of a collection of multiple arrays. + To be used in conjunction with the `semantique.processor.FilterProcessor`. + + Note: Track-type attributes are kept to have a compatible structure with the + `semantique.processor.Collection` but are effectively ignored. Parameters ---------- @@ -1479,26 +2846,111 @@ class Collection(list): """ def __init__(self, list_obj): - super(Collection, self).__init__(list_obj) + # Only active objects should be given any weight when processing MetaCollections + # Init merges active objects & sets values of inactive obj to extent of active obj + if any([x.sqm.active for x in list_obj]): + # Create empty response obj. + tidxs = [] + for arr in list_obj: + if arr.sqm.active: + # Reduce arrays to 3 dimensions. + if arr.ndim > 3: + dim = [x for x in arr.dims if x not in [TIME, "x", "y"]] + assert len(dim) == 1, "Only one dimension can be reduced" + arr = arr.sqm.reduce(reducers.any_, dim[0], track_types=False) + # Extract temporal indices from array itself. + if TIME in arr.dims: + reduce_dims = [x for x in arr.dims if x != TIME] + tidxs.append(arr.time[arr.isnull().sum(reduce_dims) == 0]) + # Extract saved results from vault. + if arr.sqm.vault is not None: + tidxs.append(arr.sqm.vault) + # Combine results. + if len(tidxs) > 1: + tidxs = np.concatenate(tidxs) + tidxs = xr.DataArray(np.unique(tidxs), dims=TIME, name=TIME) + # Broadcast to equal time dimension. + if ( + any([TIME in x.dims for x in list_obj]) and + not all([TIME in x.dims for x in list_obj]) + ): + for i,arr in enumerate(list_obj): + if TIME not in arr.dims: + list_obj[i] = arr.expand_dims({'time': tidxs}) + # Fill inactive arrays with ones at coords where tidxs are given, else NaN. + if any([TIME in x.dims for x in list_obj]): + for x in list_obj: + if not x.sqm.active: + cond = x.time.isin(tidxs).broadcast_like(x) + x.values = xr.where(cond, xr.ones_like(x), xr.full_like(x, np.nan)) + # Create response object. + super(MetaCollection, self).__init__(list_obj) + self._active = True + self._vault = self._merge_arrays_vaults(list_obj) + else: + # Create response object. + super(MetaCollection, self).__init__(list_obj) + self._active = False + self._vault = None + self._locked = True if any([x.sqm.locked for x in list_obj]) else False @property - def sq(self): - """self: Semantique accessor. - - This is merely provided to ensure compatible behaviour with - :obj:`Array ` objects, which are - modelled as an accessor to :obj:`xarray.DataArray` objects. It allows to - call all other properties and methods through the prefix ``.sq``. - - """ + def sqm(self): + """self: Semantique accessor. This is merely provided to ensure compatible + behaviour with :obj:`Array ` objects.""" return self @property def is_empty(self): """:obj:`bool`: Are all elements of the collection empty arrays.""" - return all([x.sq.is_empty for x in self]) + return all([x.sqm.is_empty for x in self]) - def compose(self, track_types = True, **kwargs): + @property + def active(self): + """:obj:`bool`: Is the current object actively monitored? + This guides how the object is evaluated in some operations + Active objects should be those that contain data from the + current `watch_layer` (processor.core.FilterProcessor). + """ + return self._active + + @active.setter + def active(self, value): + self._active = value + + @active.deleter + def active(self): + self._active = None + + @property + def locked(self): + """:obj:`bool`: Is the evaluation of filter operations restricted? + If locked, filter operations will be ignored.""" + return self._locked + + @locked.setter + def locked(self, value): + self._locked = value + + @locked.deleter + def locked(self): + self._locked = None + + @property + def vault(self): + """:obj:`list`: Tresor which can be used to store timestamps + if the time dimension in the array itself has been reduced away.""" + return self._vault + + @vault.setter + def vault(self, value): + self._vault = value + + @vault.deleter + def vault(self): + self._vault = None + + def compose(self, track_types = False, **kwargs): """Apply the compose verb to the collection. The compose verb creates a categorical composition from the arrays in the @@ -1516,34 +2968,17 @@ def compose(self, track_types = True, **kwargs): -------- :obj:`xarray.DataArray` - Raises - ------ - :obj:`exceptions.InvalidValueTypeError` - If ``track_types = True`` and the value type of at least one of the - arrays in the collection is not *binary*. - """ - if track_types: - value_types = [x.sq.value_type for x in self] - if not all([x is None or x == "binary" for x in value_types]): - raise exceptions.InvalidValueTypeError( - f"Element value types for 'compose' should all be 'binary', " - f"not {np.unique(value_types).tolist()} " - ) - def index_(idx, obj): - return xr.where(obj, idx + 1, np.nan).where(obj.notnull()) - enumerated = enumerate(self) - indexed = [index_(i, x) for i, x in enumerated] - out = indexed[0] - for x in indexed[1:]: - out = out.combine_first(x) - labels = [x.name for x in self] - idxs = range(1, len(labels) + 1) - out.sq.value_type = "nominal" - out.sq.value_labels = {k:v for k, v in zip(idxs, labels)} + out = super(MetaCollection, self).compose( + track_types=track_types, + **kwargs + ) + out.sqm.active = self.active + out.sqm.locked = self.locked + out.sqm.vault = self.vault return out - def concatenate(self, dimension, track_types = True, + def concatenate(self, dimension, track_types = False, vtype = "nominal", **kwargs): """Apply the concatenate verb to the collection. @@ -1573,10 +3008,6 @@ def concatenate(self, dimension, track_types = True, Raises ------ - :obj:`exceptions.InvalidValueTypeError` - If ``track_types = True`` and the value types of the arrays in the - collection are not all equal to each other. - :obj:`exceptions.MissingDimensionError` If the dimension to concatenate along exists in some but not all arrays in the collection. @@ -1586,74 +3017,18 @@ def concatenate(self, dimension, track_types = True, semantique reserves for the temporal dimension or spatial dimensions. """ - # Check value types. - if track_types: - value_types = [x.sq.value_type for x in self] - if not all([x is None or x == value_types[0] for x in value_types]): - raise exceptions.InvalidValueTypeError( - f"Element value types for 'concatenate' should all be the same, " - f"not {np.unique(value_types).tolist()} " - ) - # Concatenate. - has_dim = [dimension in x.dims for x in self] - if any(has_dim): - if all(has_dim): - # Concatenate over existing dimension. - raw = xr.concat([x for x in self], dimension) - has_duplicated_coords = any(raw.get_index(dimension).duplicated()) - if has_duplicated_coords: - # If arrays have overlapping coordinates for this dimension: - # --> Choose the first non-missing value to be in the output. - # Do this by: - # --> Creating groups for each coordinate value. - # --> Merge these groups using the "first" reducer. - # --> Concatenate the merged groups back together. - def _merge_dups(obj): - coords = obj.get_index(dimension).values - if len(coords) > 1: - dups = [obj.isel({dimension: i}) for i, x in enumerate(coords)] - return Collection(dups).sq.merge(reducers.first_) - else: - return obj - groups = list(raw.groupby(dimension, squeeze=False)) - clean = xr.concat([_merge_dups(x[1]) for x in groups], dimension) - else: - clean = raw - out = clean.sortby(dimension) - else: - raise exceptions.MissingDimensionError( - f"Concatenation dimension '{dimension}' exists in some but not all " - "arrays in the collection" - ) - else: - # Concatenate over new dimension. - if dimension in [TIME, SPACE, X, Y]: - raise exceptions.ReservedDimensionError( - f"Dimension name '{dimension}' is reserved and should not be used " - "as a new dimension name" - ) - names = [x.name for x in self] - coords = pd.Index(names, name = dimension, tupleize_cols = False) - out = xr.concat([x for x in self], coords) - out[dimension].sq.value_type = vtype - out[dimension].sq.value_labels = {x:x for x in names} - # Update value labels. - if track_types: - orig_labs = [x.sq.value_labels for x in self] - if None not in orig_labs: - # If keys are duplicated first array should be prioritized. - # Therefore we first reverse the list of value label dictionaries. - orig_labs.reverse() - new_labs = {k:v for x in orig_labs for k,v in x.items()} - out.sq.value_labels = new_labs - else: - del out.sq.value_labels - else: - del out.sq.value_labels - # Return. + out = super(MetaCollection, self).concatenate( + dimension, + track_types=track_types, + vtype=vtype, + **kwargs + ) + out.sqm.active = self.active + out.sqm.locked = self.locked + out.sqm.vault = self.vault return out - def merge(self, reducer, track_types = True, **kwargs): + def merge(self, reducer, track_types = False, **kwargs): """Apply the merge verb to the collection. The merge verb merges the pixel values of all arrays in the collection into @@ -1677,26 +3052,13 @@ def merge(self, reducer, track_types = True, **kwargs): -------- :obj:`xarray.DataArray` - Raises - ------ - :obj:`exceptions.InvalidValueTypeError` - If ``track_types = True`` and the value types of the arrays in the - collection are not all equal to each other. - """ - if track_types: - value_types = [x.sq.value_type for x in self] - if not all([x is None or x == value_types[0] for x in value_types]): - raise exceptions.InvalidValueTypeError( - f"Element value types for 'merge' should all be the same, " - f"not {np.unique(value_types).tolist()} " - ) - dim = "__sq__" # Temporary dimension. - concat = self.concatenate(dim, track_types = False) - out = concat.sq.reduce(reducer, dim, track_types, **kwargs) + dim = "__sqm__" # Temporary dimension. + concat = self.concatenate(dim, track_types = track_types) + out = concat.sqm.reduce(reducer, dim, track_types, **kwargs) return out - def evaluate(self, operator, y = None, track_types = True, **kwargs): + def evaluate(self, operator, y = None, track_types = False, **kwargs): """Apply the evaluate verb to all arrays in the collection. See :meth:`Array.evaluate` @@ -1708,7 +3070,7 @@ def evaluate(self, operator, y = None, track_types = True, **kwargs): """ args = tuple([operator, y, track_types]) out = copy.deepcopy(self) - out[:] = [x.sq.evaluate(*args, **kwargs) for x in out] + out[:] = [x.sqm.evaluate(*args, **kwargs) for x in out] return out def extract(self, dimension, component = None, **kwargs): @@ -1723,10 +3085,10 @@ def extract(self, dimension, component = None, **kwargs): """ args = tuple([dimension, component]) out = copy.deepcopy(self) - out[:] = [x.sq.extract(*args, **kwargs) for x in out] + out[:] = [x.sqm.extract(*args, **kwargs) for x in out] return out - def filter(self, filterer, track_types = True, **kwargs): + def filter(self, filterer, track_types = False, **kwargs): """Apply the filter verb to all arrays in the collection. See :meth:`Array.filter` @@ -1738,10 +3100,10 @@ def filter(self, filterer, track_types = True, **kwargs): """ args = tuple([filterer, track_types]) out = copy.deepcopy(self) - out[:] = [x.sq.filter(*args, **kwargs) for x in out] + out[:] = [x.sqm.filter(*args, **kwargs) for x in out] return out - def assign(self, y, at = None, track_types = True, **kwargs): + def assign(self, y, at = None, track_types = False, **kwargs): """Apply the assign verb to all arrays in the collection. See :meth:`Array.assign` @@ -1753,10 +3115,10 @@ def assign(self, y, at = None, track_types = True, **kwargs): """ args = tuple([y, at, track_types]) out = copy.deepcopy(self) - out[:] = [x.sq.assign(*args, **kwargs) for x in out] + out[:] = [x.sqm.assign(*args, **kwargs) for x in out] return out - def reduce(self, reducer, dimension = None, track_types = True, **kwargs): + def reduce(self, reducer, dimension = None, track_types = False, **kwargs): """Apply the reduce verb to all arrays in the collection. See :meth:`Array.reduce` @@ -1768,7 +3130,8 @@ def reduce(self, reducer, dimension = None, track_types = True, **kwargs): """ args = tuple([reducer, dimension, track_types]) out = copy.deepcopy(self) - out[:] = [x.sq.reduce(*args, **kwargs) for x in out] + list_obj = [x.sqm.reduce(*args, **kwargs) for x in out] + out = MetaCollection(list_obj) return out def shift(self, dimension, steps, **kwargs): @@ -1783,11 +3146,11 @@ def shift(self, dimension, steps, **kwargs): """ args = tuple([dimension, steps]) out = copy.deepcopy(self) - out[:] = [x.sq.shift(*args, **kwargs) for x in out] + out[:] = [x.sqm.shift(*args, **kwargs) for x in out] return out def smooth(self, reducer, dimension, size, limit = 2, fill = False, - track_types = True, **kwargs): + track_types = False, **kwargs): """Apply the smooth verb to all arrays in the collection. See :meth:`Array.smooth` @@ -1799,7 +3162,7 @@ def smooth(self, reducer, dimension, size, limit = 2, fill = False, """ args = tuple([reducer, dimension, size, limit, fill, track_types]) out = copy.deepcopy(self) - out[:] = [x.sq.smooth(*args, **kwargs) for x in out] + out[:] = [x.sqm.smooth(*args, **kwargs) for x in out] return out def trim(self, dimension = None, **kwargs): @@ -1813,10 +3176,10 @@ def trim(self, dimension = None, **kwargs): """ out = copy.deepcopy(self) - out[:] = [x.sq.trim(dimension, **kwargs) for x in out] + out[:] = [x.sqm.trim(dimension, **kwargs) for x in out] return out - def delineate(self, track_types = True, **kwargs): + def delineate(self, track_types = False, **kwargs): """Apply the delineate verb to all arrays in the collection. See :meth:`Array.delineate` @@ -1827,10 +3190,10 @@ def delineate(self, track_types = True, **kwargs): """ out = copy.deepcopy(self) - out[:] = [x.sq.delineate(track_types, **kwargs) for x in out] + out[:] = [x.sqm.delineate(track_types, **kwargs) for x in out] return out - def fill(self, dimension, method, track_types = True, **kwargs): + def fill(self, dimension, method, track_types = False, **kwargs): """Apply the fill verb to all arrays in the collection. See :meth:`Array.fill` @@ -1842,7 +3205,7 @@ def fill(self, dimension, method, track_types = True, **kwargs): """ args = tuple([dimension, method, track_types]) out = copy.deepcopy(self) - out[:] = [x.sq.fill(*args, **kwargs) for x in out] + out[:] = [x.sqm.fill(*args, **kwargs) for x in out] return out def name(self, value, **kwargs): @@ -1856,10 +3219,10 @@ def name(self, value, **kwargs): """ out = copy.deepcopy(self) - out[:] = [x.sq.name(value, **kwargs) for x in out] + out[:] = [x.sqm.name(value, **kwargs) for x in out] return out - def apply_custom(self, verb, track_types = True, **kwargs): + def apply_custom(self, verb, track_types = False, **kwargs): """Apply a user-defined verb to all arrays in the collection. See :meth:`Array.apply_custom` @@ -1871,7 +3234,7 @@ def apply_custom(self, verb, track_types = True, **kwargs): """ args = tuple([verb, track_types]) out = copy.deepcopy(self) - out[:] = [x.sq.apply_custom(*args, **kwargs) for x in out] + out[:] = [x.sqm.apply_custom(*args, **kwargs) for x in out] return out def regularize(self): @@ -1885,7 +3248,7 @@ def regularize(self): """ out = copy.deepcopy(self) - out[:] = [x.sq.regularize() for x in out] + out[:] = [x.sqm.regularize() for x in out] return out def stack_spatial_dims(self): @@ -1899,7 +3262,7 @@ def stack_spatial_dims(self): """ out = copy.deepcopy(self) - out[:] = [x.sq.stack_spatial_dims() for x in out] + out[:] = [x.sqm.stack_spatial_dims() for x in out] return out def unstack_spatial_dims(self): @@ -1913,5 +3276,17 @@ def unstack_spatial_dims(self): """ out = copy.deepcopy(self) - out[:] = [x.sq.unstack_spatial_dims() for x in out] + out[:] = [x.sqm.unstack_spatial_dims() for x in out] + return out + + def _merge_arrays_vaults(self, arrays): + """Merge vaults of arrays by taking the union of their elements.""" + if any([x.sqm.active and x.sqm.vault is not None for x in arrays]): + vaults = [ + x.sqm.vault for x in arrays if (x.sqm.active and x.sqm.vault is not None) + ] + vault = np.unique(np.concatenate(vaults)) + out = xr.DataArray(vault, dims=TIME, name=TIME) + else: + out = None return out \ No newline at end of file diff --git a/semantique/processor/core.py b/semantique/processor/core.py index 10a307e5..0be56864 100644 --- a/semantique/processor/core.py +++ b/semantique/processor/core.py @@ -1,14 +1,18 @@ import geopandas as gpd import numpy as np +import pandas as pd import copy import inspect import logging +import pystac import pyproj import pytz import warnings import xarray as xr +from semantique import datacube from semantique import exceptions +from semantique.dimensions import TIME, SPACE, X, Y from semantique.processor import arrays, operators, reducers, values, utils logger = logging.getLogger(__name__) @@ -67,9 +71,9 @@ def __init__(self, recipe, datacube, mapping, extent, custom_verbs = None, self.custom_reducers = custom_reducers self.preview = preview if cache is None: - self.cache = Cache() + self.cache = Cache() else: - self.cache = cache + self.cache = cache @property def response(self): @@ -444,7 +448,8 @@ def handle_result(self, block): Returns ------- - :obj:`xarray.DataArray` or :obj:`Collection ` + :obj:`xarray.DataArray` or + :obj:`Collection ` Raises ------ @@ -482,7 +487,8 @@ def handle_self(self, block): Returns ------- - :obj:`xarray.DataArray` or :obj:`Collection ` + :obj:`xarray.DataArray` or + :obj:`Collection ` """ out = self._get_eval_obj() @@ -518,7 +524,8 @@ def handle_processing_chain(self, block): Returns ------- - :obj:`xarray.DataArray` or :obj:`Collection ` + :obj:`xarray.DataArray` or + :obj:`Collection ` """ obj = self.call_handler(block["with"]) @@ -538,7 +545,8 @@ def handle_verb(self, block): Returns ------- - :obj:`xarray.DataArray` or :obj:`Collection ` + :obj:`xarray.DataArray` or + :obj:`Collection ` """ out = self.call_handler(block, key = "name") @@ -557,7 +565,8 @@ def handle_evaluate(self, block): Returns ------- - :obj:`xarray.DataArray` or :obj:`Collection ` + :obj:`xarray.DataArray` or + :obj:`Collection ` """ # Get function parameters. @@ -591,7 +600,8 @@ def handle_extract(self, block): Returns ------- - :obj:`xarray.DataArray` or :obj:`Collection ` + :obj:`xarray.DataArray` or + :obj:`Collection ` """ return self.call_verb("extract", block["params"]) @@ -607,7 +617,8 @@ def handle_filter(self, block): Returns ------- - :obj:`xarray.DataArray` or :obj:`Collection ` + :obj:`xarray.DataArray` or + :obj:`Collection ` """ # Get function parameters. @@ -630,7 +641,8 @@ def handle_assign(self, block): Returns ------- - :obj:`xarray.DataArray` or :obj:`Collection ` + :obj:`xarray.DataArray` or + :obj:`Collection ` """ # Get function parameters. @@ -690,7 +702,8 @@ def handle_reduce(self, block): Returns ------- - :obj:`xarray.DataArray` or :obj:`Collection ` + :obj:`xarray.DataArray` or + :obj:`Collection ` """ # Get function parameters. @@ -713,7 +726,8 @@ def handle_shift(self, block): Returns ------- - :obj:`xarray.DataArray` or :obj:`Collection ` + :obj:`xarray.DataArray` or + :obj:`Collection ` """ return self.call_verb("shift", block["params"]) @@ -729,7 +743,8 @@ def handle_smooth(self, block): Returns ------- - :obj:`xarray.DataArray` or :obj:`Collection ` + :obj:`xarray.DataArray` or + :obj:`Collection ` """ # Get function parameters. @@ -752,7 +767,8 @@ def handle_trim(self, block): Returns ------- - :obj:`xarray.DataArray` or :obj:`Collection ` + :obj:`xarray.DataArray` or + :obj:`Collection ` """ return self.call_verb("trim", block["params"]) @@ -768,7 +784,8 @@ def handle_delineate(self, block): Returns ------- - :obj:`xarray.DataArray` or :obj:`Collection ` + :obj:`xarray.DataArray` or + :obj:`Collection ` """ # Get and update function parameters. @@ -788,7 +805,8 @@ def handle_fill(self, block): Returns ------- - :obj:`xarray.DataArray` or :obj:`Collection ` + :obj:`xarray.DataArray` or + :obj:`Collection ` """ # Get function parameters. @@ -809,7 +827,8 @@ def handle_name(self, block): Returns ------- - :obj:`xarray.DataArray` or :obj:`Collection ` + :obj:`xarray.DataArray` or + :obj:`Collection ` """ return self.call_verb("name", block["params"]) @@ -825,7 +844,8 @@ def handle_apply_custom(self, block): Returns ------- - :obj:`xarray.DataArray` or :obj:`Collection ` + :obj:`xarray.DataArray` or + :obj:`Collection ` """ # Get function parameters. @@ -1077,7 +1097,8 @@ def call_verb(self, name, params): Returns ------- - :obj:`xarray.DataArray` or :obj:`Collection ` + :obj:`xarray.DataArray` or + :obj:`Collection ` """ # Get the object to apply the verb to. @@ -1266,7 +1287,8 @@ def call_verb(self, name, params): Returns ------- - :obj:`xarray.DataArray` or :obj:`Collection ` + :obj:`xarray.DataArray` or + :obj:`Collection ` """ return self._get_eval_obj() @@ -1296,8 +1318,7 @@ def handle_concept(self, block): custom_verbs = self._custom_verbs, custom_operators = self._custom_operators, custom_reducers = self._custom_reducers, - track_types = self._track_types, - + track_types = self._track_types ) logger.debug(f"Translated concept {block['reference']}:\n{out}") return out @@ -1331,6 +1352,625 @@ def handle_layer(self, block): self._cache.build(block["reference"]) return xr.full_like(self._extent, np.nan) +class FilterProcessor(QueryProcessor): + """ + Worker that processes a semantic query recipe to retrieve the required temporal + extent of all layers that are used in the recipe. + + Parameters + ---------- + recipe : QueryRecipe + The query recipe to be processed. + datacube : Datacube + The datacube instance to process the query against. + mapping : Mapping + The mapping instance to process the query against. + extent : :obj:`xarray.DataArray` + The spatio-temporal extent in which the query should be processed. Should + be given as an array with a temporal dimension and two spatial dimensions + such as returned by + :func:`parse_extent `. + custom_verbs : :obj:`dict`, optional + User-defined verbs that may be used when executing the query recipe in + addition to the built-in verbs in semantique. + custom_operators : :obj:`dict`, optional + User-defined operator functions that may be used when evaluating + expressions with the evaluate verb in addition to the built-in operators + in semantique. Built-in operators with the same name will be overwritten. + custom_reducers : :obj:`dict`, optional + User-defined reducer functions that may be used when reducing array + dimensions with the reduce verb in addition to the built-in reducers in + semantique. Built-in reducers with the same name will be overwritten. + track_types : :obj:`bool` + Should the query processor keep track of the value type of arrays + when applying processes, and promote them if necessary? This option is + always disabled for the FakeProcessor since it doesn't evaualte processes + and therefore can't check the validity of the types of the arrays. + preview : :obj:`bool` + Run the query processor with reduced resolution to test the recipe execution. + Preview-runs are necessary if cache should be used. + cache : :obj:`Cache` + The cache object that is used to store data layers. + watch_layer : :obj:`str` + The layer that is currently being focussed on. All temporal filter operations + are analyzed regarding the effect they are having on this layer. Doesn't need + to be set by the user but will be set dynamically upon calling .execute(). + meta_timestamps : :obj:`DatetimeIndex` or :obj:`Series` + The timestamps that are used as a starting point to determine the final temporal + extent. The timestamps will be filtered by the temporal filters. If left to `None`, + the timestamps will be retrieved dynamically from the referenced data layers upon + calling .execute(). Only for test purposes meta_timestamps should be provided by + the user. + """ + def __init__(self, recipe, datacube, mapping, extent, custom_verbs = None, + custom_operators = None, custom_reducers = None, track_types = True, + preview = False, cache = None, watch_layer = None, + meta_timestamps = None): + super(FilterProcessor, self).__init__( + recipe, datacube, mapping, extent, custom_verbs=custom_verbs, + custom_operators=custom_operators, custom_reducers=custom_reducers, + track_types=track_types, preview=preview, cache=cache + ) + self.track_types = False + self.watch_layer = watch_layer + self.meta_timestamps = meta_timestamps + + @property + def crs(self): + """:obj:`pyproj.crs.CRS`: Spatial coordinate reference system in which the + query should be processed.""" + return self._extent.sqm.crs + + @property + def spatial_resolution(self): + """:obj:`list`: Spatial resolution in which the query should be + processed.""" + return self._extent.sqm.spatial_resolution + + @property + def tz(self): + """:obj:`datetime.tzinfo`: Time zone in which the query should be + processed.""" + return self._extent.sqm.tz + + @property + def watch_layer(self): + """:obj:`str`: The layer that is currently being focussed on.""" + return self._watch_layer + + @watch_layer.setter + def watch_layer(self, value): + self._watch_layer = value + + @property + def meta_timestamps(self): + """:obj:`DatetimeIndex` or :obj:`Series`: The timestamps that are used as + a starting point to determine the final temporal extent.""" + return self._meta_timestamps + + @meta_timestamps.setter + def meta_timestamps(self, value): + self._meta_timestamps = value + + def execute(self): + """Execute a semantic query. + + During query execution, the query processor executes the result + instructions of the query recipe. It solves all references, evaluates them + into arrays, and applies the defined actions to them. + + Returns + ------- + :obj:`FilterProcessor` + An updated filter processor instance, with a :attr:`response` property + containing the resulting timestamps that are processed in the recipe. + + """ + logger.info("Started query processing for temporal filter evaluation.") + + # Step 1: Run fake processor to get a dict of all layers. + logger.info("Started fake query processing to resolve layer references.") + self.fap = FakeProcessor( + recipe=self.recipe, + datacube=self.datacube, + mapping=self.mapping, + extent=self.extent, + track_types=self.track_types, + custom_verbs=self.custom_verbs, + custom_operators=self.custom_operators, + custom_reducers=self.custom_reducers, + preview=self.preview, + cache=self.cache + ) + _ = self.fap.optimize().execute() + lyrs = [list(x) for x in set(tuple(x) for x in self.fap.cache.seq)] + self._response = {"_".join(x): {} for x in lyrs} + logger.info("Finished fake query processing to resolve layer references.") + logger.debug(f"Resolved layers: {lyrs}") + + # Step 2.a: Temporal filter evaluation. + concepts = self._find_and_resolve_concepts(self.recipe) + if any([self._contains_filter(x) for x in [*concepts, self.recipe]]): + skip_filter = False + try: + # Step 2.1.1: Retrieve timestamps of data layers to be filtered. + if self.meta_timestamps is None: + meta_retrieved = True + if type(self.datacube) == datacube.Opendatacube: + # Retrieve products to look up. + lyr_prods_lut = {} + for ref in self.fap.cache.seq: + lyr_prods_lut[ref] = self.datacube.lookup(*ref)["product"] + # Retrieve timestamps of prods. + meta_dfs = [] + prods = list(set(lyr_prods_lut.values())) + for prod in prods: + ref_idx = list(lyr_prods_lut.values()).index(prod) + lyr = list(lyr_prods_lut.keys())[ref_idx] + df = self.datacube.retrieve_metadata(*lyr, extent=self._extent) + df.insert(0, "prod", prod) + meta_dfs.append(df) + _meta_df = pd.concat(meta_dfs).reset_index(drop=True) + # Expand meta df for lyr information. + meta_dfs = [] + for lyr,prod in lyr_prods_lut.items(): + df = _meta_df[_meta_df['prod'] == prod].copy() + df.insert(0, "lyr", "_".join(lyr)) + meta_dfs.append(df) + logger.debug(f"Retrieved meta information for layer {lyr}:\n {df}") + logger.debug(f"Unique timestamps: {len(df.drop_duplicates(['time']))}") + meta_df = pd.concat(meta_dfs).reset_index(drop=True) + meta_df.drop(columns=["prod"], inplace=True) + elif type(self.datacube) == datacube.STACCube: + # Retrieve timestamps of references. + # Note: Contrary to ODC, .retrieve_metadata() can be called immediately + # since the metadata is already stored in the STACCube. Hence, no performance + # advantage by resolving products (=items instead of assets) first. + # What we need instead though is an LuT to map layer names back to + # the respective references. + meta_dfs = [] + lyr_ref_lut = {} + for lyr in list(set(self.fap.cache.seq)): + df = self.datacube.retrieve_metadata(*lyr, extent=self._extent) + df.insert(0, "lyr", "_".join(lyr)) + meta_dfs.append(df) + lyr_ref_lut["_".join(lyr)] = lyr + logger.debug(f"Retrieved meta information for layer {lyr}:\n {df}") + logger.debug(f"Unique timestamps: {len(df.drop_duplicates(['time']))}") + meta_df = pd.concat(meta_dfs).reset_index(drop=True) + elif type(self.datacube) == datacube.GeotiffArchive: + meta_retrieved = False + raise ValueError("FilterProcessor doesn't support GeotiffArchive.") + else: + meta_retrieved = False + raise ValueError(f"Datacube type {self.datacube} not supported.") + # Set meta timestamps to be analysed. + self.meta_timestamps = meta_df.time.unique() + else: + meta_retrieved = False + + if len(self.meta_timestamps): + # Step 2.1.2: Execute instructions for each layer & result in the recipe. + for lyr in lyrs: + self.watch_layer = "_".join(lyr) + logger.info(f"Evaluate temporal filter for layer: '{lyr}'") + for x in self._recipe: + logger.info(f"Started executing result: '{x}'") + result = self.call_handler(self._recipe[x]) + result.name = x + self._response[self.watch_layer][x] = result + logger.info(f"Finished executing result: '{x}'") + + # Step 2.1.3: Postprocessing of results. + # Omit non-active results. + for lyr,arr_dict in self._response.items(): + self._response[lyr] = { + k: v for k, v in arr_dict.items() if v.sqm.active + } + # Combine temporal extents of Collections. + # Arrive at set of arrays as results. + for lyr,arr_dict in self._response.items(): + for k,v in arr_dict.items(): + if type(v.sqm).__name__ == 'MetaCollection': + # Fill datetime arrays with ones. + for i,arr in enumerate(v): + if np.issubdtype(arr.dtype, np.datetime64): + v[i] = xr.ones_like(arr, dtype="int32") + if type(v.sqm).__name__ == 'MetaCollection': + self._response[lyr][k] = v.sqm.merge( + reducers.any_, + track_types=False + ) + # Retrieve valid temporal indices per layer and result. + # Valid indices are those that are not null. + response = copy.deepcopy(self._response) + for lyr,arr_dict in self._response.items(): + for res,arr in arr_dict.items(): + # Create empty response obj. + out = [] + # Reduce arrays to at most 3 dimensions. + if arr.ndim > 3: + dim = [x for x in arr.dims if x not in ["time", "x", "y"]] + assert len(dim) == 1, "Only one dimension can be reduced" + arr = arr.sqm.reduce(reducers.any_, dim[0], track_types=False) + # Extract temporal indices from array itself. + if "time" in arr.dims: + reduce_dims = [x for x in arr.dims if x != "time"] + out.append(arr.time[arr.isnull().sum(reduce_dims) == 0]) + # Extract saved results from vault. + if arr.sqm.vault is not None: + out.append(arr.sqm.vault) + # Combine results from array and vault. + if len(out): + if len(out) > 1: + out = xr.DataArray( + np.unique(np.concatenate(out)), + dims="time", + name="time" + ) + else: + out = out[0] + response[lyr][res] = out + else: + # If no valid temporal indices are found, remove result. + # Occurs if result are extracted spatial coordinates. + response[lyr].pop(res) + self._response = response + # Omit empty layer results. + self._response = {k:v for k,v in self._response.items() if v} + if len(self._response): + # Create temporal extents' union over results for each layer. + for lyr,arr_dict in self._response.items(): + time_coords = [arr_dict[key].values for key in arr_dict] + merged_time = np.unique(np.concatenate(time_coords)) + merged_time.sort() + self._response[lyr] = pd.to_datetime(merged_time) + # Sort response items. + self._response = {k: v for k, v in sorted(self._response.items())} + else: + skip_filter = True + except Exception as e: + skip_filter = True + logger.error(f"An error occurred during FilterProcessor execution: {e}") + logger.error("FilterProcessor evaluation is skipped.") + else: + skip_filter = True + meta_retrieved = False + + # Step 2.b: Shortcut if no temporal filter is present. + # Keep all initial timestamps. + if skip_filter: + for lyr in lyrs: + self._response["_".join(lyr)] = pd.to_datetime(self.meta_timestamps) + self._response = {k: v for k, v in sorted(self._response.items())} + + # Step 3: Update datacube dataset according to valid timestamps. + if meta_retrieved: + # Copy datacube to update. + if type(self.datacube) == datacube.Opendatacube: + # Copy datacube. + dc_con = self.datacube.connection + self.datacube.connection = None + _datacube = copy.deepcopy(self.datacube) + _datacube.connection = dc_con + self.datacube.connection = dc_con + # Extract valid dataset ids corresponding to timestamps. + id_dict = {} + for k,v in self._response.items(): + ids = meta_df[(meta_df.lyr == k) & (meta_df.time.isin(pd.Series(v)))].id + id_dict[k] = list(ids) + logger.debug(f"Temporally filtered results for layer {k}") + logger.debug(f"- unique timestamps: {len(np.unique(v))}") + logger.debug(f"- unique datasets: {len(ids)}") + _datacube.data_dict = id_dict + self.datacube = _datacube + elif type(self.datacube) == datacube.STACCube: + # Copy datacube. + _datacube = copy.deepcopy(self.datacube) + _datacube.src = pystac.ItemCollection(self.datacube.src) + # Extract valid collection_item_Ids corresponding to timestamps. + id_df = pd.DataFrame() + for k,v in self._response.items(): + lyr_name = self.datacube.lookup(*lyr_ref_lut[k])["name"] + ids = meta_df[(meta_df.lyr == k) & (meta_df.time.isin(pd.Series(v)))].id + id_df = pd.concat([id_df, pd.DataFrame({"prod": ids, "lyr" : lyr_name})]) + logger.debug(f"Temporally filtered results for layer {k}") + logger.debug(f"- unique timestamps: {len(np.unique(v))}") + logger.debug(f"- unique items: {len(ids)}") + id_df = id_df.groupby("prod").lyr.unique().reset_index() + # Subset items & assets as input to datacube correspondingly. + if len(id_df): + filtered_items = [] + for item in _datacube.src: + item_id = (item.get_collection().id, item.id) + df_subset = id_df[id_df["prod"].isin([item_id])] + if len(df_subset): + asset_dict = {} + for asset in df_subset["lyr"].iloc[0]: + asset_dict[asset] = item.assets[asset] + item.assets = asset_dict + filtered_items.append(item) + _datacube.src = filtered_items + else: + _datacube.src = [] + self.datacube = _datacube + + # Step 4: Return result. + out = self._response + logger.info("Finished query processing for temporal filter evaluation.") + return out + + def call_verb(self, name, params): + """Apply a verb to the active evaluation object. + + Parameters + ----------- + name : :obj:`str` + Name of the verb. + params : :obj:`dict` + Parameters to be forwarded to the verb. + + Returns + ------- + :obj:`xarray.DataArray` or + :obj:`MetaCollection ` + + """ + # Get the object to apply the verb to. + obj = self._get_eval_obj() + # Apply the verb. + verb = getattr(obj.sqm, name) + out = verb(**params) + # Warn when output array is empty. + try: + is_empty = out.sqm.is_empty + except AttributeError: + is_empty = out.is_empty + if is_empty: + warnings.warn( + f"Verb '{name}' returned an empty array" + ) + logger.debug(f"Applied verb {name}:\n{out}") + return out + + + def handle_concept(self, block): + """Handler for semantic concept references. + + Parameters + ---------- + block : :obj:`dict` + Textual representation of a building block of type "concept". + + Returns + ------- + :obj:`xarray.DataArray` + + """ + logger.debug(f"Translating concept {block['reference']}") + out = self._mapping.translate( + *block["reference"], + property = block["property"] if "property" in block else None, + extent = self._extent, + datacube = self._datacube, + eval_obj = self._get_eval_obj(), + processor = FilterProcessor, + preview = self._preview, + cache = self._cache, + custom_verbs = self._custom_verbs, + custom_operators = self._custom_operators, + custom_reducers = self._custom_reducers, + track_types = self._track_types, + watch_layer = self._watch_layer, + meta_timestamps = self._meta_timestamps, + ) + logger.debug(f"Translated concept {block['reference']}:\n{out}") + return out + + def handle_collection(self, block): + """Handler for collection references. + + Parameters + ---------- + block : :obj:`dict` + Textual representation of a building block of type "collection". + + Returns + ------- + :obj:`processor.arrays.MetaCollection` + + """ + logger.debug("Constructing collection of arrays") + list_obj = [self.call_handler(x) for x in block["elements"]] + out = arrays.MetaCollection(list_obj) + logger.debug(f"Constructed collection of:\n{[x.name for x in out]}") + return out + + def handle_label(self, block): + """Handler for value labels. + + Parameters + ---------- + block : :obj:`dict` + Textual representation of a building block of type "label". + + Returns + ------- + :obj:None + """ + return None + + def handle_layer(self, block): + """Handler for data layer references. + + Parameters + ---------- + block : :obj:`dict` + Textual representation of a building block of type "layer". + + Returns + ------- + :obj:`xarray.DataArray` + """ + self._cache.build(block["reference"]) + # Create a data array with the same shape as temporal extent. + x_coords = [0, 1] + y_coords = [0, 1] + data = np.ones((len(self.meta_timestamps), len(y_coords), len(x_coords))) + data_array = xr.DataArray( + data, + coords = { + TIME: self.meta_timestamps, + X: y_coords, + Y: x_coords, + "spatial_feats": ((Y, X), np.ones((len(y_coords), len(x_coords)))) + }, + dims = [TIME, Y, X] + ) + # Set relevant data array properties. + layer_key = "_".join(block["reference"]) + data_array.name = layer_key + if layer_key == self._watch_layer: + data_array.sqm.active = True + else: + data_array.sqm.active = False + data_array.sqm.locked = False + return data_array + + def handle_result(self, block): + """Handler for result references. + + Parameters + ---------- + block : :obj:`dict` + Textual representation of a building block of type "result". + + Returns + ------- + :obj:`xarray.DataArray` or + :obj:`MetaCollection ` + + Raises + ------ + :obj:`exceptions.UnknownResultError` + If the referenced result is not present in the query recipe. + + """ + name = block["name"] + logger.debug(f"Fetching result '{name}'") + # Process referenced result if it is not processed yet. + if name not in self._response[self.watch_layer]: + try: + instructions = self._recipe[name] + except KeyError: + raise exceptions.UnknownResultError( + f"Recipe does not contain result '{name}'" + ) + logger.info(f"Started executing result: '{name}'") + result = self.call_handler(instructions) + result.name = name + self._response[self.watch_layer][name] = result + logger.info(f"Finished executing result: '{name}'") + # Return referenced result. + out = self._response[self.watch_layer][name] + logger.debug(f"Fetched result '{name}':\n{out}") + return out + + def _contains_filter(self, params): + """Recursively check for temporal filter in nested dictionaries or lists. + + Parameters + ---------- + params : :obj:`dict` or :obj:`list` + The parameters to be checked. + + Returns + ------- + :obj:`bool` + True if temporal filter is found in the parameters, False otherwise. + """ + if isinstance(params, dict): + if params.get('type') == 'verb' and params.get('name') == 'filter': + if 'params' in params: + return self._contains_extract_time(params['params']) + for key, value in params.items(): + if isinstance(value, (dict, list)): + if self._contains_filter(value): + return True + elif isinstance(params, list): + for item in params: + if self._contains_filter(item): + return True + return False + + def _contains_extract_time(self, params): + """Recursively check for 'extract time' in nested dictionaries or lists. + + Parameters + ---------- + params : :obj:`dict` or :obj:`list` + The parameters to be checked. + + Returns + ------- + :obj:`bool` + True if 'extract time' is found in the parameters, False otherwise. + """ + if isinstance(params, dict): + if params.get('type') == 'verb' and params.get('name') == 'extract': + if 'params' in params and params['params'].get('dimension') == 'time': + return True + for key, value in params.items(): + if isinstance(value, (dict, list)): + if self._contains_extract_time(value): + return True + elif isinstance(params, list): + for item in params: + if self._contains_extract_time(item): + return True + return False + + def _find_and_resolve_concepts(self, recipe): + """Searches recursively for and resolve 'concept' references in a recipe. + + Parameters + ---------- + recipe : :obj:`dict` or :obj:`list` + The recipe (or pieces of it) which is to be checked for references. + + Returns + ------- + :obj:`list` + List of concept defitions that are referenced in the recipe. + """ + results = [] + if isinstance(recipe, dict): + # Check if this dict contains a concept reference + if ( + recipe.get('type') == 'concept' and + isinstance(recipe.get('reference'), tuple) and + recipe['reference'][0] == 'entity' + ): + # Resolve reference + ref = recipe['reference'] + property = recipe.get('property') + ruleset = self._mapping.lookup(*ref) + if property is None: + results.append(ruleset) + else: + try: + results.append(ruleset[property]) + except KeyError: + raise KeyError(f"Property '{property}' is not defined for concept '{ref}'") + else: + # Recur into the dictionary to search for nested occurrences + for key, value in recipe.items(): + results.extend(self._find_and_resolve_concepts(value)) + elif isinstance(recipe, list): + # Recur into lists to search for nested dictionaries + for item in recipe: + results.extend(self._find_and_resolve_concepts(item)) + return results class Cache: """Cache of retrieved data layers. @@ -1387,4 +2027,4 @@ def _add_data(self, key, value): def _rm_data(self, key): """Remove data layer from cache.""" - del self._data[key] \ No newline at end of file + del self._data[key] diff --git a/semantique/processor/operators.py b/semantique/processor/operators.py index dbc8f7c2..d3ca2cff 100644 --- a/semantique/processor/operators.py +++ b/semantique/processor/operators.py @@ -7,6 +7,23 @@ from semantique.processor.values import Interval from semantique.dimensions import SPACE +def get_accessor(data, meta = False): + """Get the appropriate accessor for the data object. + + Parameters + ---------- + data : :obj:`xarray.DataArray` + The data object to get the accessor for. + meta : :obj:`bool` + Should the meta accessor be used? If False, the standard accessor is + used. The meta accessor is used to access MetaArray and MetaCollection + instead of Array and Collection objects. + """ + if meta: + return data.sqm + else: + return data.sq + # # UNIVARIATE OPERATORS # @@ -839,7 +856,7 @@ def to_radians_(x, track_types = True, **kwargs): # ALGEBRAIC OPERATORS # -def add_(x, y, track_types = True, **kwargs): +def add_(x, y, track_types = True, meta = False, **kwargs): """Add y to x. Parameters @@ -857,6 +874,8 @@ def add_(x, y, track_types = True, **kwargs): track_types : :obj:`bool` Should the operator promote the value type of the output object, based on the value type of the input objects? + meta : :obj:`bool` + Should the MetaArray accessor be used? **kwargs: Ignored. @@ -887,13 +906,13 @@ def add_(x, y, track_types = True, **kwargs): promoter = TypePromoter(x, y, function = "add") promoter.check() f = lambda x, y: np.add(x, y) - y = xr.DataArray(y).sq.align_with(x) + y = get_accessor(xr.DataArray(y), meta).align_with(x) out = xr.apply_ufunc(f, x, y, keep_attrs = True) if track_types: out = promoter.promote(out) return out -def divide_(x, y, track_types = True, **kwargs): +def divide_(x, y, track_types = True, meta = False, **kwargs): """Divide x by y. Parameters @@ -911,6 +930,8 @@ def divide_(x, y, track_types = True, **kwargs): track_types : :obj:`bool` Should the operator promote the value type of the output object, based on the value type of the input objects? + meta : :obj:`bool` + Should the meta accessor be used? **kwargs: Ignored. @@ -941,13 +962,13 @@ def divide_(x, y, track_types = True, **kwargs): promoter = TypePromoter(x, y, function = "divide") promoter.check() f = lambda x, y: np.divide(x, np.where(np.equal(y, 0), np.nan, y)) - y = xr.DataArray(y).sq.align_with(x) + y = get_accessor(xr.DataArray(y), meta).align_with(x) out = xr.apply_ufunc(f, x, y, keep_attrs = True) if track_types: out = promoter.promote(out) return out -def multiply_(x, y, track_types = True, **kwargs): +def multiply_(x, y, track_types = True, meta = False, **kwargs): """Multiply x by y. Parameters @@ -965,6 +986,8 @@ def multiply_(x, y, track_types = True, **kwargs): track_types : :obj:`bool` Should the operator promote the value type of the output object, based on the value type of the input objects? + meta : :obj:`bool` + Should the meta accessor be used? **kwargs: Ignored. @@ -995,13 +1018,13 @@ def multiply_(x, y, track_types = True, **kwargs): promoter = TypePromoter(x, y, function = "multiply") promoter.check() f = lambda x, y: np.multiply(x, y) - y = xr.DataArray(y).sq.align_with(x) + y = get_accessor(xr.DataArray(y), meta).align_with(x) out = xr.apply_ufunc(f, x, y, keep_attrs = True) if track_types: out = promoter.promote(out) return out -def power_(x, y, track_types = True, **kwargs): +def power_(x, y, track_types = True, meta = False, **kwargs): """Raise x to the yth power. Parameters @@ -1019,6 +1042,8 @@ def power_(x, y, track_types = True, **kwargs): track_types : :obj:`bool` Should the operator promote the value type of the output object, based on the value type of the input objects? + meta : :obj:`bool` + Should the meta accessor be used? **kwargs: Ignored. @@ -1049,13 +1074,13 @@ def power_(x, y, track_types = True, **kwargs): promoter = TypePromoter(x, y, function = "power") promoter.check() f = lambda x, y: np.power(x, y) - y = xr.DataArray(y).sq.align_with(x) + y = get_accessor(xr.DataArray(y), meta).align_with(x) out = xr.apply_ufunc(f, x, y, keep_attrs = True) if track_types: out = promoter.promote(out) return out -def subtract_(x, y, track_types = True, **kwargs): +def subtract_(x, y, track_types = True, meta = False, **kwargs): """Subtract y from x. Parameters @@ -1073,6 +1098,8 @@ def subtract_(x, y, track_types = True, **kwargs): track_types : :obj:`bool` Should the operator promote the value type of the output object, based on the value type of the input objects? + meta : :obj:`bool` + Should the meta accessor be used? **kwargs: Ignored. @@ -1103,13 +1130,13 @@ def subtract_(x, y, track_types = True, **kwargs): promoter = TypePromoter(x, y, function = "subtract") promoter.check() f = lambda x, y: np.subtract(x, y) - y = xr.DataArray(y).sq.align_with(x) + y = get_accessor(xr.DataArray(y), meta).align_with(x) out = xr.apply_ufunc(f, x, y, keep_attrs = True) if track_types: out = promoter.promote(out) return out -def normalized_difference_(x, y, track_types = True, **kwargs): +def normalized_difference_(x, y, track_types = True, meta = False, **kwargs): """Compute the normalized difference between x and y. The normalized difference is used to calculate common indices in remote @@ -1131,6 +1158,8 @@ def normalized_difference_(x, y, track_types = True, **kwargs): track_types : :obj:`bool` Should the operator promote the value type of the output object, based on the value type of the input objects? + meta : :obj:`bool` + Should the meta accessor be used? **kwargs: Ignored. @@ -1161,7 +1190,7 @@ def normalized_difference_(x, y, track_types = True, **kwargs): promoter = TypePromoter(x, y, function = "normalized_difference") promoter.check() f = lambda x, y: np.divide(np.subtract(x, y), np.add(x, y)) - y = xr.DataArray(y).sq.align_with(x) + y = get_accessor(xr.DataArray(y), meta).align_with(x) out = xr.apply_ufunc(f, x, y, keep_attrs = True) if track_types: out = promoter.promote(out) @@ -1171,7 +1200,7 @@ def normalized_difference_(x, y, track_types = True, **kwargs): # BOOLEAN OPERATORS # -def and_(x, y, track_types = True, **kwargs): +def and_(x, y, track_types = True, meta = False, **kwargs): """Test if both x and y are true. Parameters @@ -1189,6 +1218,8 @@ def and_(x, y, track_types = True, **kwargs): track_types : :obj:`bool` Should the operator promote the value type of the output object, based on the value type of the input objects? + meta : :obj:`bool` + Should the meta accessor be used? **kwargs: Ignored. @@ -1221,13 +1252,13 @@ def and_(x, y, track_types = True, **kwargs): def f(x, y): y = utils.null_as_zero(y) return np.where(pd.notnull(x), np.logical_and(x, y), np.nan) - y = xr.DataArray(y).sq.align_with(x) + y = get_accessor(xr.DataArray(y), meta).align_with(x) out = xr.apply_ufunc(f, x, y, keep_attrs = True) if track_types: out = promoter.promote(out) return out -def or_(x, y, track_types = True, **kwargs): +def or_(x, y, track_types = True, meta = False, **kwargs): """Test if at least one of x and y are true. Parameters @@ -1245,6 +1276,8 @@ def or_(x, y, track_types = True, **kwargs): track_types : :obj:`bool` Should the operator promote the value type of the output object, based on the value type of the input objects? + meta : :obj:`bool` + Should the meta accessor be used? **kwargs: Ignored. @@ -1284,13 +1317,13 @@ def or_(x, y, track_types = True, **kwargs): def f(x, y): y = utils.null_as_zero(y) return np.where(pd.notnull(x), np.logical_or(x, y), np.nan) - y = xr.DataArray(y).sq.align_with(x) + y = get_accessor(xr.DataArray(y), meta).align_with(x) out = xr.apply_ufunc(f, x, y, keep_attrs = True) if track_types: out = promoter.promote(out) return out -def exclusive_or_(x, y, track_types = True, **kwargs): +def exclusive_or_(x, y, track_types = True, meta = False, **kwargs): """Test if either x or y is true but not both. Parameters @@ -1308,6 +1341,8 @@ def exclusive_or_(x, y, track_types = True, **kwargs): track_types : :obj:`bool` Should the operator promote the value type of the output object, based on the value type of the input objects? + meta : :obj:`bool` + Should the meta accessor be used? **kwargs: Ignored. @@ -1347,7 +1382,7 @@ def exclusive_or_(x, y, track_types = True, **kwargs): def f(x, y): y = utils.null_as_zero(y) return np.where(pd.notnull(x), np.logical_xor(x, y), np.nan) - y = xr.DataArray(y).sq.align_with(x) + y = get_accessor(xr.DataArray(y), meta).align_with(x) out = xr.apply_ufunc(f, x, y, keep_attrs = True) if track_types: out = promoter.promote(out) @@ -1357,7 +1392,7 @@ def f(x, y): # EQUALITY OPERATORS # -def equal_(x, y, track_types = True, **kwargs): +def equal_(x, y, track_types = True, meta = False, **kwargs): """Test if x is equal to y. Parameters @@ -1375,6 +1410,8 @@ def equal_(x, y, track_types = True, **kwargs): track_types : :obj:`bool` Should the operator promote the value type of the output object, based on the value type of the input objects? + meta : :obj:`bool` + Should the meta accessor be used? **kwargs: Ignored. @@ -1405,7 +1442,7 @@ def equal_(x, y, track_types = True, **kwargs): promoter = TypePromoter(x, y, function = "equal") promoter.check() f = lambda x, y: np.where(pd.notnull(x), np.equal(x, y), np.nan) - y = xr.DataArray(y).sq.align_with(x) + y = get_accessor(xr.DataArray(y), meta).align_with(x) out = xr.apply_ufunc(f, x, y, keep_attrs = True) if track_types: out = promoter.promote(out) @@ -1466,7 +1503,7 @@ def f(x, y): out = promoter.promote(out) return out -def not_equal_(x, y, track_types = True, **kwargs): +def not_equal_(x, y, track_types = True, meta = False, **kwargs): """Test if x is not equal to y. Parameters @@ -1484,6 +1521,8 @@ def not_equal_(x, y, track_types = True, **kwargs): track_types : :obj:`bool` Should the operator promote the value type of the output object, based on the value type of the input objects? + meta : :obj:`bool` + Should the meta accessor be used? **kwargs: Ignored. @@ -1514,7 +1553,7 @@ def not_equal_(x, y, track_types = True, **kwargs): promoter = TypePromoter(x, y, function = "not_equal") promoter.check() f = lambda x, y: np.where(pd.notnull(x), np.not_equal(x, y), np.nan) - y = xr.DataArray(y).sq.align_with(x) + y = get_accessor(xr.DataArray(y), meta).align_with(x) out = xr.apply_ufunc(f, x, y, keep_attrs = True) if track_types: out = promoter.promote(out) @@ -1579,7 +1618,7 @@ def f(x, y): # REGULAR RELATIONAL OPERATORS # -def greater_(x, y, track_types = True, **kwargs): +def greater_(x, y, track_types = True, meta = False, **kwargs): """Test if x is greater than y. Parameters @@ -1597,6 +1636,8 @@ def greater_(x, y, track_types = True, **kwargs): track_types : :obj:`bool` Should the operator promote the value type of the output object, based on the value type of the input objects? + meta : :obj:`bool` + Should the meta accessor be used? **kwargs: Ignored. @@ -1627,13 +1668,13 @@ def greater_(x, y, track_types = True, **kwargs): promoter = TypePromoter(x, y, function = "greater") promoter.check() f = lambda x, y: np.where(pd.notnull(x), np.greater(x, y), np.nan) - y = xr.DataArray(y).sq.align_with(x) + y = get_accessor(xr.DataArray(y), meta).align_with(x) out = xr.apply_ufunc(f, x, y, keep_attrs = True) if track_types: out = promoter.promote(out) return out -def greater_equal_(x, y, track_types = True, **kwargs): +def greater_equal_(x, y, track_types = True, meta = False, **kwargs): """Test if x is greater than or equal to y. Parameters @@ -1651,6 +1692,8 @@ def greater_equal_(x, y, track_types = True, **kwargs): track_types : :obj:`bool` Should the operator promote the value type of the output object, based on the value type of the input objects? + meta : :obj:`bool` + Should the meta accessor be used? **kwargs: Ignored. @@ -1681,13 +1724,13 @@ def greater_equal_(x, y, track_types = True, **kwargs): promoter = TypePromoter(x, y, function = "greater_equal") promoter.check() f = lambda x, y: np.where(pd.notnull(x), np.greater_equal(x, y), np.nan) - y = xr.DataArray(y).sq.align_with(x) + y = get_accessor(xr.DataArray(y), meta).align_with(x) out = xr.apply_ufunc(f, x, y, keep_attrs = True) if track_types: out = promoter.promote(out) return out -def less_(x, y, track_types = True, **kwargs): +def less_(x, y, track_types = True, meta = False, **kwargs): """Test if x is less than y. Parameters @@ -1705,6 +1748,8 @@ def less_(x, y, track_types = True, **kwargs): track_types : :obj:`bool` Should the operator promote the value type of the output object, based on the value type of the input objects? + meta : :obj:`bool` + Should the meta accessor be used? **kwargs: Ignored. @@ -1735,13 +1780,13 @@ def less_(x, y, track_types = True, **kwargs): promoter = TypePromoter(x, y, function = "less") promoter.check() f = lambda x, y: np.where(pd.notnull(x), np.less(x, y), np.nan) - y = xr.DataArray(y).sq.align_with(x) + y = get_accessor(xr.DataArray(y), meta).align_with(x) out = xr.apply_ufunc(f, x, y, keep_attrs = True) if track_types: out = promoter.promote(out) return out -def less_equal_(x, y, track_types = True, **kwargs): +def less_equal_(x, y, track_types = True, meta = False, **kwargs): """Test if x is less than or equal to y. Parameters @@ -1759,6 +1804,8 @@ def less_equal_(x, y, track_types = True, **kwargs): track_types : :obj:`bool` Should the operator promote the value type of the output object, based on the value type of the input objects? + meta : :obj:`bool` + Should the meta accessor be used? **kwargs: Ignored. @@ -1789,7 +1836,7 @@ def less_equal_(x, y, track_types = True, **kwargs): promoter = TypePromoter(x, y, function = "less_equal") promoter.check() f = lambda x, y: np.where(pd.notnull(x), np.less_equal(x, y), np.nan) - y = xr.DataArray(y).sq.align_with(x) + y = get_accessor(xr.DataArray(y), meta).align_with(x) out = xr.apply_ufunc(f, x, y, keep_attrs = True) if track_types: out = promoter.promote(out) @@ -1799,7 +1846,7 @@ def less_equal_(x, y, track_types = True, **kwargs): # SPATIAL RELATIONAL OPERATORS # -def intersects_(x, y, track_types = True, **kwargs): +def intersects_(x, y, track_types = True, meta = False, **kwargs): """Test if x spatially intersects with y. This is a specific spatial relational operator meant to be evaluated with @@ -1822,6 +1869,8 @@ def intersects_(x, y, track_types = True, **kwargs): track_types : :obj:`bool` Should the operator promote the value type of the output object, based on the value type of the input objects? + meta : :obj:`bool` + Should the meta accessor be used? **kwargs: Ignored. @@ -1858,10 +1907,10 @@ def intersects_(x, y, track_types = True, **kwargs): try: y = y.unary_union except AttributeError: - y = y.sq.trim().sq.grid_points.envelope.unary_union - values = x.sq.grid_points.intersects(y).astype(int) - coords = x.sq.stack_spatial_dims()[SPACE].coords - out = xr.DataArray(values, coords = coords).sq.unstack_spatial_dims() + y = get_accessor(get_accessor(y, meta).trim(), meta).grid_points.envelope.unary_union + values = get_accessor(x, meta).grid_points.intersects(y).astype(int) + coords = get_accessor(x, meta).stack_spatial_dims()[SPACE].coords + out = get_accessor(xr.DataArray(values, coords = coords), meta).unstack_spatial_dims() if track_types: out = promoter.promote(out) return out @@ -2065,7 +2114,7 @@ def f(x, y): # Note: These are used by the assign verb. -def assign_(x, y, track_types = True, **kwargs): +def assign_(x, y, track_types = True, meta = False, **kwargs): """Replace x by y. Parameters @@ -2083,6 +2132,8 @@ def assign_(x, y, track_types = True, **kwargs): track_types : :obj:`bool` Should the operator promote the value type of the output object, based on the value type of the input objects? + meta : :obj:`bool` + Should the meta accessor be used? **kwargs: Ignored. @@ -2113,13 +2164,13 @@ def assign_(x, y, track_types = True, **kwargs): promoter = TypePromoter(x, y, function = "assign") promoter.check() f = lambda x, y: np.where(pd.notnull(x), y, utils.get_null(y)) - y = xr.DataArray(y).sq.align_with(x) + y = get_accessor(xr.DataArray(y), meta).align_with(x) out = xr.apply_ufunc(f, x, y, keep_attrs = True) if track_types: out = promoter.promote(out) return out -def assign_at_(x, y, z, track_types = True, **kwargs): +def assign_at_(x, y, z, track_types = True, meta = False, **kwargs): """Replace x by y where z is true. Parameters @@ -2141,6 +2192,8 @@ def assign_at_(x, y, z, track_types = True, **kwargs): track_types : :obj:`bool` Should the operator promote the value type of the output object, based on the value type of the input objects? + meta : :obj:`bool` + Should the meta accessor be used? **kwargs: Ignored. @@ -2171,8 +2224,8 @@ def assign_at_(x, y, z, track_types = True, **kwargs): promoter = TypePromoter(x, y, function = "assign_at") promoter.check() f = lambda x, y, z: np.where(np.logical_and(pd.notnull(z), z), y, x) - y = xr.DataArray(y).sq.align_with(x) - z = z.sq.align_with(x) + y = get_accessor(xr.DataArray(y), meta).align_with(x) + z = get_accessor(z, meta).align_with(x) out = xr.apply_ufunc(f, x, y, z, keep_attrs = True) if track_types: out = promoter.promote(out) diff --git a/semantique/recipe.py b/semantique/recipe.py index 34d1d12a..69c6da5f 100644 --- a/semantique/recipe.py +++ b/semantique/recipe.py @@ -1,4 +1,4 @@ -from semantique.processor.core import QueryProcessor, FakeProcessor +from semantique.processor.core import QueryProcessor, FakeProcessor, FilterProcessor from semantique.visualiser.visualise import show class QueryRecipe(dict): @@ -28,8 +28,17 @@ def __init__(self, results = None): obj = {} if results is None else results super(QueryRecipe, self).__init__(obj) - def execute(self, datacube, mapping, space, time, run_preview = False, - cache_data = True, **config): + def execute( + self, + datacube, + mapping, + space, + time, + filter_check = True, + run_preview = False, + cache_data = True, + **config + ): """Execute a query recipe. This function initializes a :obj:`processor.core.QueryProcessor` instance @@ -46,6 +55,11 @@ def execute(self, datacube, mapping, space, time, run_preview = False, The spatial extent in which the query should be processed. time : TemporalExtent The temporal extent in which the query should be processed. + filter_check : :obj:`bool` + Should the query processor evaluate possible temporal filter operations + upfront? This can reduce the amount of data to be processed in the + subsequent evaluation process. If the recipe doesn't contain any + temporal filter operations, this flag has no effect. run_preview : :obj:`bool` Should a preview run with reduced spatial resolution be performed? A preview run enables to test if the recipe execution succeeds @@ -83,12 +97,25 @@ def execute(self, datacube, mapping, space, time, run_preview = False, >>> recipe.execute(dc, mapping, space, time, **config) """ - if cache_data: - fp = FakeProcessor.parse(self, datacube, mapping, space, time, **config) - _ = fp.optimize().execute() - cache = fp.cache + if filter_check: + # Use FilterProcessor to retrieve required minimum set of data IDs + fip = FilterProcessor.parse(self, datacube, mapping, space, time, **config) + _ = fip.optimize().execute() + # Update datacube according to FilterProcessor + datacube = fip.datacube + # Retrieve cache from fake processor instance as part of the filter processor + if cache_data: + cache = fip.fap.cache + else: + cache = None else: - cache = None + # Retrieve cache from standalone fake processor instance + if cache_data: + fap = FakeProcessor.parse(self, datacube, mapping, space, time, **config) + _ = fap.optimize().execute() + cache = fap.cache + else: + cache = None qp = QueryProcessor.parse( self, @@ -110,4 +137,4 @@ def visualise(self): editor. The recipe is converted into Blockly XML format and served to the browser. """ - show(self) + show(self) \ No newline at end of file