diff --git a/kafka/__init__.py b/kafka/__init__.py index c2aa732..687e357 100644 --- a/kafka/__init__.py +++ b/kafka/__init__.py @@ -1,5 +1,7 @@ -__all__ = ['inference','linear_kf.LinearKalman','input_output'] +__all__ = ['inference','linear_kf.LinearKalman','input_output', + 'observation_operators'] # deprecated to keep older scripts who import this from breaking from .inference import * from .input_output import * +from .observation_operators import * from linear_kf import LinearKalman diff --git a/kafka/inference/kf_tools.py b/kafka/inference/kf_tools.py index cc7960a..753c468 100644 --- a/kafka/inference/kf_tools.py +++ b/kafka/inference/kf_tools.py @@ -10,6 +10,11 @@ from utils import block_diag +class NoHessianMethod(Exception): + """An exception triggered when the forward model isn't able to provide an + estimation of the Hessian""" + def __init__(self, message): + self.message = message def band_selecta(band): if band == 0: @@ -33,6 +38,10 @@ def hessian_correction(gp, x0, R_mat, innovation, mask, state_mask, band, nparams): """Calculates higher order Hessian correction for the likelihood term. Needs the GP, the Observational uncertainty, the mask....""" + if not hasattr(gp, "hessian"): + # The observation operator does not provide a Hessian method. We just + # return 0, meaning no Hessian correction. + return 0. C_obs_inv = R_mat.diagonal()[state_mask.flatten()] mask = mask[state_mask].flatten() little_hess = [] @@ -59,12 +68,13 @@ def tip_prior(): Returns ------- The mean prior vector, covariance and inverse covariance matrices.""" - sigma = np.array([0.12, 0.7, 0.0959, 0.15, 1.5, 0.2, 0.5]) # broadly TLAI 0->7 for 1sigma + # broadly TLAI 0->7 for 1sigma + sigma = np.array([0.12, 0.7, 0.0959, 0.15, 1.5, 0.2, 0.5]) x0 = np.array([0.17, 1.0, 0.1, 0.7, 2.0, 0.18, np.exp(-0.5*1.5)]) # The individual covariance matrix - little_p = np.diag ( sigma**2).astype(np.float32) - little_p[5,2] = 0.8862*0.0959*0.2 - little_p[2,5] = 0.8862*0.0959*0.2 + little_p = np.diag(sigma**2).astype(np.float32) + little_p[5, 2] = 0.8862*0.0959*0.2 + little_p[2, 5] = 0.8862*0.0959*0.2 inv_p = np.linalg.inv(little_p) return x0, little_p, inv_p diff --git a/kafka/input_output/Sentinel1_Observations.py b/kafka/input_output/Sentinel1_Observations.py new file mode 100644 index 0000000..162e2bb --- /dev/null +++ b/kafka/input_output/Sentinel1_Observations.py @@ -0,0 +1,193 @@ +#!/usr/bin/env python +""" + +""" +import datetime +import glob +import os +from collections import namedtuple + +import gdal + +import numpy as np +import pdb +import osr + +import scipy.sparse as sp + +WRONG_VALUE = -999.0 # TODO tentative missing value + +SARdata = namedtuple('SARdata', + 'observations uncertainty mask metadata emulator') + + +def reproject_image(source_img, target_img, dstSRSs=None): + """Reprojects/Warps an image to fit exactly another image. + Additionally, you can set the destination SRS if you want + to or if it isn't defined in the source image.""" + g = gdal.Open(target_img) + geo_t = g.GetGeoTransform() + x_size, y_size = g.RasterXSize, g.RasterYSize + xmin = min(geo_t[0], geo_t[0] + x_size * geo_t[1]) + xmax = max(geo_t[0], geo_t[0] + x_size * geo_t[1]) + ymin = min(geo_t[3], geo_t[3] + y_size * geo_t[5]) + ymax = max(geo_t[3], geo_t[3] + y_size * geo_t[5]) + xRes, yRes = abs(geo_t[1]), abs(geo_t[5]) + if dstSRSs is None: + dstSRS = osr.SpatialReference() + raster_wkt = g.GetProjection() + dstSRS.ImportFromWkt(raster_wkt) + else: + dstSRS = dstSRSs + g = gdal.Warp('', source_img, format='MEM', + outputBounds=[xmin, ymin, xmax, ymax], xRes=xRes, yRes=yRes, + dstSRS=dstSRS) + if g is None: + raise ValueError("Something failed with GDAL!") + return g + + +class S1Observations(object): + """ + """ + + def __init__(self, data_folder, state_mask, + emulators={'VV': 'SOmething', 'VH': 'Other'}): + + """ + + """ + # 1. Find the files + files = glob.glob(os.path.join(data_folder, '*.nc')) + files.sort() + self.state_mask = state_mask + self.dates = [] + self.date_data = {} + for fich in files: + fname = os.path.basename(fich) + splitter = fname.split('_') + this_date = datetime.datetime.strptime(splitter[5], + '%Y%m%dT%H%M%S') + self.dates.append(this_date) + self.date_data[this_date] = fich + # 2. Store the emulator(s) + self.emulators = emulators + + def _read_backscatter(self, obs_ptr): + """ + Read backscatter from NetCDF4 File + Should return a 2D array that should overlap with the "state grid" + + Input + ------ + fname (string, filename of the NetCDF4 file, path included) + polarisation (string, used polarisation) + + Output + ------ + backscatter values stored within NetCDF4 file for given polarisation + + """ + backscatter = obs_ptr.ReadAsArray() + return backscatter + + def _calculate_uncertainty(self, backscatter): + """ + Calculation of the uncertainty of Sentinel-1 input data + + Radiometric uncertainty of Sentinel-1 Sensors are within 1 and 0.5 dB + + Calculate Equivalent Number of Looks (ENL) of input dataset leads to + uncertainty of scene caused by speckle-filtering/multi-looking + + Input + ------ + backscatter (backscatter values) + + Output + ------ + unc (uncertainty in dB) + + """ + + # first approximation of uncertainty (1 dB) + unc = backscatter*0.05 + + + # need to find a good way to calculate ENL + # self.ENL = (backscatter.mean()**2) / (backscatter.std()**2) + + return unc + + def _get_mask(self, backscatter): + """ + Mask for selection of pixels + + Get a True/False array with the selected/unselected pixels + + + Input + ------ + this_file ????? + + Output + ------ + + """ + + mask = np.ones_like(backscatter, dtype=np.bool) + mask[backscatter == WRONG_VALUE] = False + return mask + + def get_band_data(self, timestep, band): + """ + get all relevant S1 data information for one timestep to get processing + done + + + Input + ------ + timestep + band + + Output + ------ + sardata (namedtuple with information on observations, uncertainty, + mask, metadata, emulator/used model) + + """ + + if band == 0: + polarisation = 'VV' + elif band == 1: + polarisation = 'VH' + this_file = self.date_data[timestep] + fname = 'NETCDF:"{:s}":sigma0_{:s}'.format(this_file, polarisation) + obs_ptr = reproject_image(fname, self.state_mask) + observations = self._read_backscatter(obs_ptr) + uncertainty = self._calculate_uncertainty(observations) + mask = self._get_mask(observations) + R_mat = np.zeros_like(observations) + R_mat = uncertainty + R_mat[np.logical_not(mask)] = 0. + N = mask.ravel().shape[0] + R_mat_sp = sp.lil_matrix((N, N)) + R_mat_sp.setdiag(1./(R_mat.ravel())**2) + R_mat_sp = R_mat_sp.tocsr() + + emulator = self.emulators[polarisation] + # TODO read in angle of incidence from netcdf file + # metadata['incidence_angle_deg'] = + fname = 'NETCDF:"{:s}":{:s}'.format(this_file, "theta") + obs_ptr = reproject_image(fname, self.state_mask) + relativeorbit = obs_ptr.GetMetadata()['NC_GLOBAL#relativeorbit'] + orbitdirection = obs_ptr.GetMetadata()['NC_GLOBAL#orbitdirection'] + satellite = obs_ptr.GetMetadata()['NC_GLOBAL#satellite'] + metadata = {'incidence_angle': obs_ptr.ReadAsArray(), 'relativeorbit': relativeorbit, 'orbitdirection': orbitdirection, 'satellite': satellite} + sardata = SARdata(observations, R_mat_sp, mask, metadata, emulator) + return sardata + + +if __name__ == "__main__": + data_folder = "/media/ucfajlg/WERKLY/jose/new/" + sentinel1 = S1Observations(data_folder) diff --git a/kafka/input_output/__init__.py b/kafka/input_output/__init__.py index 8c36463..ea76916 100644 --- a/kafka/input_output/__init__.py +++ b/kafka/input_output/__init__.py @@ -1,3 +1,4 @@ -__all__ = ["observations"] +__all__ = ["observations", "Sentinel1_Observations"] from .observations import * +from .Sentinel1_Observations import * diff --git a/kafka/input_output/observations.py b/kafka/input_output/observations.py index 9ad6fd4..34adc1b 100644 --- a/kafka/input_output/observations.py +++ b/kafka/input_output/observations.py @@ -49,7 +49,7 @@ import numpy as np -from BRDF_descriptors import RetrieveBRDFDescriptors +# from BRDF_descriptors import RetrieveBRDFDescriptors #from kernels import Kernels @@ -209,92 +209,138 @@ def get_band_data(self, the_date, band_no): BHR = np.sum(BHR * to_NIR, axis=0) + a_to_NIR -class BHRObservations(RetrieveBRDFDescriptors): - def __init__(self, emulator, tile, mcd43a1_dir, - start_time, ulx=0, uly=0, dx=2400, dy=2400, end_time=None, - mcd43a2_dir=None): - """The class needs to locate the data granules. We assume that - these are available somewhere in the filesystem and that we can - index them by location (MODIS tile name e.g. "h19v10") and - time. The user can give a folder for the MCD43A1 and A2 granules, - and if the second is ignored, it will be assumed that they are - in the same folder. We also need a starting date (either a - datetime object, or a string in "%Y-%m-%d" or "%Y%j" format. If - the end time is not specified, it will be set to the date of the - latest granule found.""" - - # Call the constructor first - # Python2 - RetrieveBRDFDescriptors.__init__(self, tile, - mcd43a1_dir, start_time, end_time, - mcd43a2_dir) - # Python3 - # super().__init__(tile, mcd43a1_dir, start_time, end_time, - # mcd43a2_dir) - self._get_emulator(emulator) - self.dates = sorted(self.a1_granules.keys()) - self.dates = self.dates[::8] - a1_temp = {} - a2_temp = {} - for k in self.dates: - a1_temp[k] = self.a1_granules[k] - a2_temp[k] = self.a2_granules[k] - self.a1_granules = a1_temp - self.a2_granules = a2_temp - self.band_transfer = {0: "vis", - 1: "nir"} - self.ulx = ulx - self.uly = uly - self.dx = dx - self.dy = dy - - def define_output(self): - reference_fname = self.a1_granules[self.dates[0]] - g = gdal.Open('HDF4_EOS:EOS_GRID:' + - '"%s":MOD_Grid_BRDF:BRDF_Albedo_Parameters_vis' % - reference_fname) - proj = g.GetProjection() - geoT = np.array(g.GetGeoTransform()) - new_geoT = geoT*1. - new_geoT[0] = new_geoT[0] + self.ulx*new_geoT[1] - new_geoT[3] = new_geoT[3] + self.uly*new_geoT[5] - return proj, new_geoT.tolist() +# class BHRObservations(RetrieveBRDFDescriptors): +# def __init__(self, emulator, tile, mcd43a1_dir, +# start_time, ulx=0, uly=0, dx=2400, dy=2400, end_time=None, +# mcd43a2_dir=None): +# """The class needs to locate the data granules. We assume that +# these are available somewhere in the filesystem and that we can +# index them by location (MODIS tile name e.g. "h19v10") and +# time. The user can give a folder for the MCD43A1 and A2 granules, +# and if the second is ignored, it will be assumed that they are +# in the same folder. We also need a starting date (either a +# datetime object, or a string in "%Y-%m-%d" or "%Y%j" format. If +# the end time is not specified, it will be set to the date of the +# latest granule found.""" + +# # Call the constructor first +# # Python2 +# RetrieveBRDFDescriptors.__init__(self, tile, +# mcd43a1_dir, start_time, end_time, +# mcd43a2_dir) +# # Python3 +# # super().__init__(tile, mcd43a1_dir, start_time, end_time, +# # mcd43a2_dir) +# self._get_emulator(emulator) +# self.dates = sorted(self.a1_granules.keys()) +# self.dates = self.dates[::8] +# a1_temp = {} +# a2_temp = {} +# for k in self.dates: +# a1_temp[k] = self.a1_granules[k] +# a2_temp[k] = self.a2_granules[k] +# self.a1_granules = a1_temp +# self.a2_granules = a2_temp +# self.band_transfer = {0: "vis", +# 1: "nir"} +# self.ulx = ulx +# self.uly = uly +# self.dx = dx +# self.dy = dy + +# def define_output(self): +# reference_fname = self.a1_granules[self.dates[0]] +# g = gdal.Open('HDF4_EOS:EOS_GRID:' + +# '"%s":MOD_Grid_BRDF:BRDF_Albedo_Parameters_vis' % +# reference_fname) +# proj = g.GetProjection() +# geoT = np.array(g.GetGeoTransform()) +# new_geoT = geoT*1. +# new_geoT[0] = new_geoT[0] + self.ulx*new_geoT[1] +# new_geoT[3] = new_geoT[3] + self.uly*new_geoT[5] +# return proj, new_geoT.tolist() + +# def _get_emulator(self, emulator): +# if not os.path.exists(emulator): +# raise IOError("The emulator {} doesn't exist!".format(emulator)) +# # Assuming emulator is in an pickle file... +# self.emulator = cPickle.load(open(emulator, 'rb')) + +# def get_band_data(self, the_date, band_no): + +# to_BHR = np.array([1.0, 0.189184, -1.377622]) +# retval = self.get_brdf_descriptors(band_no, the_date) +# if retval is None: # No data on this date +# return None +# kernels, mask, qa_level = retval +# mask = mask[self.uly:(self.uly + self.dy), +# self.ulx:(self.ulx + self.dx)] +# qa_level = qa_level[self.uly:(self.uly + self.dy), +# self.ulx:(self.ulx + self.dx)] +# kernels = kernels[:, self.uly:(self.uly + self.dy), +# self.ulx:(self.ulx + self.dx)] +# bhr = np.where(mask, +# kernels * to_BHR[:, None, None], np.nan).sum(axis=0) +# R_mat = np.zeros_like(bhr) + +# R_mat[qa_level == 0] = np.maximum(2.5e-3, bhr[qa_level == 0] * 0.05) +# R_mat[qa_level == 1] = np.maximum(2.5e-3, bhr[qa_level == 1] * 0.07) +# R_mat[np.logical_not(mask)] = 0. +# N = mask.ravel().shape[0] +# R_mat_sp = sp.lil_matrix((N, N)) +# R_mat_sp.setdiag(1./(R_mat.ravel())**2) +# R_mat_sp = R_mat_sp.tocsr() + +# bhr_data = BHR_data(bhr, mask, R_mat_sp, None, self.emulator) +# return bhr_data + + +class BHRObservationsTest(object): + """A class to test BHR data "one pixel at a time". In essence, one only needs + to define a self.dates dictionary (keys are datetime objects), and a 2 element + list or array with the VIS/NIR albedo. then we need the get_band_method...""" + def __init__(self, emulator, dates, vis_albedo, nir_albedo): + assert (len(dates) == len(vis_albedo)) + assert (len(dates) == len(nir_albedo)) + self.emulator = self._get_emulator(emulator) + self.dates = dates + self.data = {} + for ii, the_date in enumerate(dates): + self.data[the_date] = [vis_albedo[ii], nir_albedo[ii]] + def _get_emulator(self, emulator): if not os.path.exists(emulator): raise IOError("The emulator {} doesn't exist!".format(emulator)) # Assuming emulator is in an pickle file... - self.emulator = cPickle.load(open(emulator, 'rb')) + return cPickle.load(open(emulator, 'rb')) - def get_band_data(self, the_date, band_no): - to_BHR = np.array([1.0, 0.189184, -1.377622]) - retval = self.get_brdf_descriptors(band_no, the_date) - if retval is None: # No data on this date - return None - kernels, mask, qa_level = retval - mask = mask[self.uly:(self.uly + self.dy), - self.ulx:(self.ulx + self.dx)] - qa_level = qa_level[self.uly:(self.uly + self.dy), - self.ulx:(self.ulx + self.dx)] - kernels = kernels[:, self.uly:(self.uly + self.dy), - self.ulx:(self.ulx + self.dx)] - bhr = np.where(mask, - kernels * to_BHR[:, None, None], np.nan).sum(axis=0) - R_mat = np.zeros_like(bhr) - - R_mat[qa_level == 0] = np.maximum(2.5e-3, bhr[qa_level == 0] * 0.05) - R_mat[qa_level == 1] = np.maximum(2.5e-3, bhr[qa_level == 1] * 0.07) - R_mat[np.logical_not(mask)] = 0. - N = mask.ravel().shape[0] - R_mat_sp = sp.lil_matrix((N, N)) - R_mat_sp.setdiag(1./(R_mat.ravel())**2) + def get_band_data(self, the_date, band_no): + bhr = self.data[the_date][band_no] + mask = np.ones((1,1), dtype=np.bool) + R_mat = 1./(np.maximum(2.5e-3, bhr * 0.05))**2 + R_mat_sp = sp.lil_matrix((1,1)) + R_mat_sp.setdiag(R_mat) R_mat_sp = R_mat_sp.tocsr() + bhr_data = BHR_data(bhr, mask, R_mat_sp, None, self.emulator) return bhr_data +class KafkaOutputBHRTest(object): + """A very simple class to output the state.""" + def __init__(self): + self.output = {} + def dump_data(self, timestep, x_analysis, P_analysis, P_analysis_inv, + state_mask): + solution = {} + for ii, param in enumerate(["w_vis", "x_vis", "a_vis", + "w_nir", "x_nir", "a_nir", "TeLAI"]): + solution[param] = x_analysis[ii::7] + self.output[timestep] = solution + class KafkaOutput(object): """A very simple class to output the state.""" def __init__(self, geotransform, projection, folder, diff --git a/kafka/observation_operators/__init__.py b/kafka/observation_operators/__init__.py new file mode 100644 index 0000000..9063770 --- /dev/null +++ b/kafka/observation_operators/__init__.py @@ -0,0 +1,3 @@ +__all__ = ["sar_forward_model"] + +from .sar_forward_model import * diff --git a/kafka/observation_operators/sar_forward_model.py b/kafka/observation_operators/sar_forward_model.py new file mode 100644 index 0000000..2cebea1 --- /dev/null +++ b/kafka/observation_operators/sar_forward_model.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python +""" + +""" +import logging + +import numpy as np + +import scipy.sparse as sp + +LOG = logging.getLogger(__name__) + +def sar_observation_operator(x, theta, polarisation): + + """ + For the sar_observation_operator a simple Water Cloud Model (WCM) is used + We assume that the WCM is given by + + ----------------------------------------------------- + tau = exp(-2*B*V/cos(theta)) + sigma_veg = A*V**E*cos(theta)*(1-tau) + sigma_soil = 10**((C+D*SM)/10) + sigma_0 = sigma_veg + tau*sigma_soil + + A, B, C, D, E are parameters determined by fitting of the model + V is a vegetation descriptor e.g. LAI, LWAI, VWM + SM is the soil moisture [m^3/m^3] + sigma_veg is the volume component of the backscatter [m^3/m^3] + sigma_soil is the surface component of the backscatter [m^3/m^3] + tau is the two way attenuation through the canopy (unitless) + sigma_0 is the total backscatter in [m^3/m^3] + ---------------------------------------------------- + + Input + ----- + polarisation: considered polarisation as string + x: 2D array where every row is the set of parameters for one pixel + + Output + ------ + sigma_0: predicted backscatter for each individual parameter set + grad: gradient for each individual parameter set and each parameter determined by 2D input array x. The gradient should thus have the same size and shape as x + sigma_veg: predicted volume component of sigma_0 + sigma_surf: predicted surface component of sigma_0 + tau: predicted two-way attenuation through the canopy + """ + + # x 2D array where every row is the set of parameters for one pixel + x = np.atleast_2d(x) + + # conversion of incidence angle to radiant + # the incidence angle itself should probably implemented in x) + # TODO needs to come from the data + theta = np.deg2rad(theta) + + # Simpler definition of cosine of theta + mu = np.cos(theta) + + # the model parameters (A, B, C, D, E) for different polarisations + parameters = {'VV': [0.0846, 0.0615, -14.8465, 15.907, 1.], + 'VH': [0.0795, 0.1464, -14.8332, 15.907, 0.]} + + # Select model parameters + try: + A, B, C, D, E = parameters[polarisation] + except KeyError: + raise ValueError('Only VV and VH polarisations available!') + + # Calculate Model + tau = np.exp(-2. * B / mu * x[:, 0]) + sigma_veg = A * (x[:, 0] ** E) * mu * (1. - tau) + sigma_surf = 10. ** ((C + D * x[:, 1]) / 10.) + + sigma_0 = sigma_veg + tau * sigma_surf + #pdb.set_trace() + + # Calculate Gradient (grad has same dimension as x) + grad = x*0 + n_elems = x.shape[0] + for i in range(n_elems): + tau_value = np.exp(-2. * B / mu[i] * x[i, 0]) + grad[i, 0] = A * E * mu[i] * (x[i, 0] ** (E - 1.)) * (1. - tau_value) + \ + 2. * A * B * (x[i, 0] ** E) * tau_value - ( + (2. ** (1/10. * (C + D * x[i, 1]) + 1.)) * + (5. ** (1/10. * (C + D * x[i, 1])) * B * tau_value) + ) / mu[i] + grad[i, 1] = D * np.log(10.) * tau_value * 10. ** ( + 1./10. * (C + D * x[i, 1]) - 1.) + + # returned values are linear scaled not dB!!! + # return sigma_0, grad, sigma_veg, sigma_surf, tau + return sigma_0, grad + + +def create_sar_observation_operator(n_params, forward_model, metadata, + mask, state_mask, x_forecast, band): + """Creates the SAR observation operator using the Water Cloud SAR forward + model (defined above). + + Parameters + ----------- + n_params: int + Number of parameters in the state vector per pixel + forward_model: function + The function to call the forward model. Defined above + metadata: list + Not used + mask: array + A 2D mask with the observational valid pixels + state_mask: array + A 2D mask with the pixels that will be used for state inference + x_forecast: array + The state vector around which the linearisation will be done. Order is + parameters per pixel (e.g. LAI1, SM1, LAI2, SM2, ..., LAIN, SMN) + band: array + The band number. Assumed 0 is VV and 1 is VH. + + Returns + -------- + H0, dH + """ + LOG.info("Creating the ObsOp for band %d" % band) + n_times = x_forecast.shape[0] / n_params + # good_obs = mask.sum() + H_matrix = sp.lil_matrix((n_times, n_params * n_times), + dtype=np.float32) + H0 = np.zeros(n_times, dtype=np.float32) + + # So the model has spectral components. + if band == 0: + # VV + polarisation = "VV" + elif band == 1: + # VH + polarisation = "VH" + # This loop can be JIT'ed + x0 = np.zeros((n_times, n_params)) + theta = np.zeros((n_times)) + for i, m in enumerate(mask[state_mask].flatten()): + if m: + x0[i, :] = x_forecast[(n_params * i): (n_params*(i+1))] + theta[i] = 23.#metadata['incidence_angle'] + LOG.info("Running SAR forward model") + # Calls the run_emulator method that only does different vectors + # It might be here that we do some sort of clustering + + H0_, dH = forward_model(x0[mask[state_mask]], theta, polarisation) + + LOG.info("Storing emulators in H matrix") + # This loop can be JIT'ed too + n = 0 + + for i, m in enumerate(mask[state_mask].flatten()): + if m: + H_matrix[i, (n_params * i): (n_params*(i+1))] = dH[n] + H0[i] = H0_[n] + n += 1 + LOG.info("\tDone!") + return (H0, H_matrix.tocsr()) + + # # Calculate Gradient without conversion of sigma_soil from dB to linear + # grad = x*0 + # n_elems = x.shape[0] + # for i in range(n_elems): + # tau = np.exp(-2 * B / mu * x[i, 0]) + # grad[i, 0] = 2 * A * B * x[i, 0] * tau - \ + # A * mu * tau + \ + # A * mu - \ + # 2 * B * C * tau / mu - \ + # 2 * B * D * x[i, 1] * tau / mu + # grad[i, 1] = D * tau