Source code for lidarSuit.filters

"""Module for keep all filtering functionalities

"""

import pandas as pd
import numpy as np


[docs]class filtering: """SNR and Status filter It uses the carrier to noise ratio (SNR) and status variables available in the WindCube's data. It is similar to the filter described in the manual Parameters ---------- data : xrarray.Dataset Dataset containing the original WindCube's data """
[docs] def __init__(self, data): self.data = data
def getVerticalObsComp(self, variable, snr=False, status=True): """Vertical data filter It uses the SNR and status variables to filter out the data from vertical observations Parameters ---------- variable : str name of the variable that will be filtered snr : bool, int, optional if an interger is given it is used to as threshold to filter the data based on the signal to noise ratio status : bool, optional if true it filters the data using the status variable generated by the WindCube's software Returns ------- tmpData : xarray.DataSet an instance of the variable filtered using SNR or status variable """ tmpData = self.data[variable] if status: tmpData = tmpData.where(self.data.radial_wind_speed_status90 == 1) if snr != False: tmpData = tmpData.where(self.data.cnr90 > snr) tmpData = tmpData.where(self.data.elevation == 90, drop=True) return tmpData def getRadialObsComp(self, variable, azm, snr=False, status=True): """Slanted data filter It uses the SNR and status variables to filter out the data from slanted observations Parameters ---------- variable : str name of the variable that will be filtered snr : bool, int, optional if an interger is given it is used to as threshold to filter the data based on the signal to noise ratio status : bool, optional if true it filters the data using the status variable generated by the WindCube's software Returns ------- tmpaData : xarray.DataSet an instance of the variable filtered using SNR or status variable """ tmpData = self.data[variable] if status: tmpData = tmpData.where(self.data.radial_wind_speed_status == 1) if snr != False: tmpData = tmpData.where(self.data.cnr > snr) tmpData = tmpData.where( (self.data.elevation != 90) & (self.data.azimuth == azm), drop=True ) return tmpData
# it removes the STE below cloud layer
[docs]class secondTripEchoFilter: """Boundary layer second trip echoes filter This filter minimises the presence of second trip echoes (STE). This filter is based on the standard deviation of the anomaly of the observaions. It is applicable in regions where there is a contrast between the real data and the STE. Parameters ---------- data : object the object returned from the getRestructuredData timeCloudMaks : xarray.DataArray it is a time series for indicating the presence of clouds above the maximum WinCube range. 1 indicates cloud and 0 indicates no cloud. (THIS MAKS IS NOT NEEDED NOW) nProf : int number of profiles used to calculating the anomaly center : bool, optional it defines how the mean value for the anomaly will be calculated min_periods : int minimum number of profiles used for calculating the mean value nStd : int Multiplication factor for defining the size of the window to keep the data. The filter removes any anomaly larger than nStd * std strH : str starting hour for calculating the anomaly endH : str end hour for calculating the anomaly Returns ------- object : object an object containing data filtered for STE """
[docs] def __init__( self, data, timeCloudMask, nProf=500, center=True, min_periods=30, nStd=2, strH="09", endH="16", ): self.lidar = data # self.timeCloudMask = timeCloudMask self.nProf = nProf self.center = center self.min_periods = min_periods self.nStd = nStd self.getTimeEdges() self.calMeanAndAnomSlant() self.calMeanAndAnom90() self.cleaning() self.cleaning90()
def getTimeEdges(self, strH="09", endH="16"): """ It creates the time boundaries for the STD anomaly calculation Parameters ---------- strH : str starting hour for calculating the anomaly endH : str end hour for calculating the anomaly """ selTime = pd.to_datetime(self.lidar.dataTransf.time.values[0]) selTime = selTime.strftime("%Y%m%d") self.startTime = pd.to_datetime("{0} {1}".format(selTime, strH)) self.endTime = pd.to_datetime("{0} {1}".format(selTime, endH)) def calMeanAndAnomSlant(self): """ It calculates the anomaly from the slanted observations """ # slanted beam tmpSelData = self.lidar.dataTransf self.dataMean = tmpSelData.rolling( time=self.nProf, center=self.center, min_periods=self.min_periods ).mean() self.dataAnom = self.lidar.dataTransf - self.dataMean def calMeanAndAnom90(self): """ It calculates the anomaly from the vertical observations """ # vertical beam tmpSelData90 = self.lidar.dataTransf90 self.dataMean90 = tmpSelData90.rolling( time=self.nProf, center=self.center, min_periods=self.min_periods ).mean() self.dataAnom90 = self.lidar.dataTransf90 - self.dataMean90 def cleaning(self): """ It removes the data that is larger than the nStd * anomaly from the slanted observations """ tmpAnom = self.dataAnom.where( (self.lidar.dataTransf.time > self.startTime) & (self.lidar.dataTransf.time < self.endTime) ) anomStd = tmpAnom.std(dim=["time", "range", "elv"]) # Cross check if this commented part is still needed for # for filter the slanted profiles # tmpNoCloud = self.lidar.dataTransf.where(self.timeCloudMask == 0).copy() # tmpCloud = self.lidar.dataTransf.where(self.timeCloudMask == 1).copy() # tmpCloud = tmpCloud.where(np.abs(self.dataAnom) < self.nStd * anomStd) # tmpCleanData = tmpNoCloud.copy() # tmpCleanData.values[np.isfinite(tmpCloud)] = tmpCloud.values[np.isfinite(tmpCloud)] tmpCleanData = self.lidar.dataTransf.copy() tmpCleanData = tmpCleanData.where( np.abs(self.dataAnom) < self.nStd * anomStd ) self.lidar.dataTransf.values = tmpCleanData.values def cleaning90(self): """ It removes the data that is larger than the nStd * anomaly from the vertical observations """ tmpAnom = self.dataAnom90.where( (self.lidar.dataTransf90.time > self.startTime) & (self.lidar.dataTransf90.time < self.endTime) ) anomStd = tmpAnom.std(dim=["time", "range90"]) # tmpNoCloud = self.lidar.dataTransf90.where(self.timeCloudMask == 0).copy() # tmpCloud = self.lidar.dataTransf90.where(self.timeCloudMask == 1).copy() # tmpCloud = tmpCloud.where(np.abs(self.dataAnom90) < self.nStd * anomStd) # tmpCleanData = tmpNoCloud.copy() # tmpCleanData.values[np.isfinite(tmpCloud)] = tmpCloud.values[np.isfinite(tmpCloud)] tmpCleanData = self.lidar.dataTransf90.copy() tmpCleanData = tmpCleanData.where( np.abs(self.dataAnom90) < self.nStd * anomStd ) self.lidar.dataTransf90.values = tmpCleanData.values
# it removes STE and clouds contamination # from above the aerosol loaded region
[docs]class windCubeCloudRemoval: """Above boundary layer second trip echoes filter This filter reduces the second trip echoes contamination and clouds from regions above the boundary layer. It requires information from the ceilometer. IT IS STILL EXPERIMENTAL Parameters ---------- ceilo : xarray.Dataset A dataset of the CHM15k Nimbus ceilometer observations. lidar : xarray.Dataset An instance of the re-structured WindCube dataset Returns ------- object : object an object containing an instance of the noise height interface and the re-structured dataset filtered for STE and clouds. """
[docs] def __init__(self, ceilo, lidar=None): self.lidar = lidar self.ceilo = ceilo self.getNoiseFreeBeta() self.getHeightInterface() if lidar != None: self.getInterpInterfHeight() self.removeCloud()
def getNoiseFreeBeta(self): """ It removes the noise from the backscattered signal """ positiveBeta = self.ceilo.beta_raw.where(self.ceilo.beta_raw > 0) positiveBeta = positiveBeta.rolling( range=10, center=True, min_periods=10 ).mean() positiveBeta = positiveBeta.rolling( time=15, center=True, min_periods=15 ).mean() self.noiseFreeBeta = positiveBeta return self def getHeightInterface(self): """ It identifies the height of the separation between the noise and the non-noise data from the ceilometer backscattered signal """ positiveBeta = self.ceilo.beta_raw.where(self.ceilo.beta_raw > 0) tmpCeiloHgt = positiveBeta.copy() tmpValues = tmpCeiloHgt.values tmpValues[np.isfinite(tmpValues)] = 1 tmpCeiloHgt.values = tmpValues tmpCeiloHgt = tmpCeiloHgt * self.ceilo.range lowestBeta = self.noiseFreeBeta.where(tmpCeiloHgt < 4e3) tmpCeiloHgt = tmpCeiloHgt.where(np.isfinite(lowestBeta)) self.interfHeight = tmpCeiloHgt.max(dim="range") self.interfHeight = self.interfHeight.rolling( time=7, center=True, min_periods=5 ).mean() return self def getInterpInterfHeight(self): """ It interpolates the noise height interface to the same temporal resolution from the windcube data """ self.interpInterfHeight = self.interfHeight.interp( time=self.lidar.dataTransf.time ) self.interpInterfHeight90 = self.interfHeight.interp( time=self.lidar.dataTransf90.time ) return self def removeCloud(self): """ It removes from the windcube's observation all data above the noise height interface """ tmpHeight = self.lidar.dataTransf.copy() tmpValues = tmpHeight.values tmpValues[np.isfinite(tmpValues)] = 1 tmpHeight.values = tmpValues tmpHeight = tmpHeight * self.lidar.dataTransf.range self.lidar.dataTransf = self.lidar.dataTransf.where( tmpHeight < self.interpInterfHeight ) tmpHeight = self.lidar.dataTransf90.copy() tmpValues = tmpHeight.values tmpValues[np.isfinite(tmpValues)] = 1 tmpHeight.values = tmpValues tmpHeight = tmpHeight * self.lidar.dataTransf90.range90 self.lidar.dataTransf90 = self.lidar.dataTransf90.where( tmpHeight < self.interpInterfHeight90 ) self.lidar.relative_beta90 = self.lidar.relative_beta90.where( tmpHeight < self.interpInterfHeight90 ) return self