"""Module for keep all filtering functionalities
"""
import pandas as pd
import numpy as np
[docs]class filtering:
"""SNR and Status filter
It uses the carrier to noise ratio (SNR) and status
variables available in the WindCube's data. It is
similar to the filter described in the manual
Parameters
----------
data : xrarray.Dataset
Dataset containing the original WindCube's data
"""
[docs] def __init__(self, data):
self.data = data
def getVerticalObsComp(self, variable, snr=False, status=True):
"""Vertical data filter
It uses the SNR and status variables to filter out
the data from vertical observations
Parameters
----------
variable : str
name of the variable that will be filtered
snr : bool, int, optional
if an interger is given it is used to
as threshold to filter the data based on
the signal to noise ratio
status : bool, optional
if true it filters the data using the status
variable generated by the WindCube's software
Returns
-------
tmpData : xarray.DataSet
an instance of the variable filtered using
SNR or status variable
"""
tmpData = self.data[variable]
if status:
tmpData = tmpData.where(self.data.radial_wind_speed_status90 == 1)
if snr != False:
tmpData = tmpData.where(self.data.cnr90 > snr)
tmpData = tmpData.where(self.data.elevation == 90, drop=True)
return tmpData
def getRadialObsComp(self, variable, azm, snr=False, status=True):
"""Slanted data filter
It uses the SNR and status variables to filter out
the data from slanted observations
Parameters
----------
variable : str
name of the variable that will be filtered
snr : bool, int, optional
if an interger is given it is used to
as threshold to filter the data based on
the signal to noise ratio
status : bool, optional
if true it filters the data using the status
variable generated by the WindCube's software
Returns
-------
tmpaData : xarray.DataSet
an instance of the variable filtered using
SNR or status variable
"""
tmpData = self.data[variable]
if status:
tmpData = tmpData.where(self.data.radial_wind_speed_status == 1)
if snr != False:
tmpData = tmpData.where(self.data.cnr > snr)
tmpData = tmpData.where(
(self.data.elevation != 90) & (self.data.azimuth == azm), drop=True
)
return tmpData
# it removes the STE below cloud layer
[docs]class secondTripEchoFilter:
"""Boundary layer second trip echoes filter
This filter minimises the presence of second
trip echoes (STE). This filter is based on the
standard deviation of the anomaly of the observaions.
It is applicable in regions where there is a contrast
between the real data and the STE.
Parameters
----------
data : object
the object returned from the getRestructuredData
timeCloudMaks : xarray.DataArray
it is a time series for indicating the presence
of clouds above the maximum WinCube range.
1 indicates cloud and 0 indicates no cloud.
(THIS MAKS IS NOT NEEDED NOW)
nProf : int
number of profiles used to calculating the anomaly
center : bool, optional
it defines how the mean value for the anomaly will
be calculated
min_periods : int
minimum number of profiles used for calculating the
mean value
nStd : int
Multiplication factor for defining the size of the
window to keep the data. The filter removes any
anomaly larger than nStd * std
strH : str
starting hour for calculating the anomaly
endH : str
end hour for calculating the anomaly
Returns
-------
object : object
an object containing data filtered for STE
"""
[docs] def __init__(
self,
data,
timeCloudMask,
nProf=500,
center=True,
min_periods=30,
nStd=2,
strH="09",
endH="16",
):
self.lidar = data
# self.timeCloudMask = timeCloudMask
self.nProf = nProf
self.center = center
self.min_periods = min_periods
self.nStd = nStd
self.getTimeEdges()
self.calMeanAndAnomSlant()
self.calMeanAndAnom90()
self.cleaning()
self.cleaning90()
def getTimeEdges(self, strH="09", endH="16"):
"""
It creates the time boundaries for the STD anomaly calculation
Parameters
----------
strH : str
starting hour for calculating the anomaly
endH : str
end hour for calculating the anomaly
"""
selTime = pd.to_datetime(self.lidar.dataTransf.time.values[0])
selTime = selTime.strftime("%Y%m%d")
self.startTime = pd.to_datetime("{0} {1}".format(selTime, strH))
self.endTime = pd.to_datetime("{0} {1}".format(selTime, endH))
def calMeanAndAnomSlant(self):
"""
It calculates the anomaly from the slanted observations
"""
# slanted beam
tmpSelData = self.lidar.dataTransf
self.dataMean = tmpSelData.rolling(
time=self.nProf, center=self.center, min_periods=self.min_periods
).mean()
self.dataAnom = self.lidar.dataTransf - self.dataMean
def calMeanAndAnom90(self):
"""
It calculates the anomaly from the vertical observations
"""
# vertical beam
tmpSelData90 = self.lidar.dataTransf90
self.dataMean90 = tmpSelData90.rolling(
time=self.nProf, center=self.center, min_periods=self.min_periods
).mean()
self.dataAnom90 = self.lidar.dataTransf90 - self.dataMean90
def cleaning(self):
"""
It removes the data that is larger than the nStd * anomaly
from the slanted observations
"""
tmpAnom = self.dataAnom.where(
(self.lidar.dataTransf.time > self.startTime)
& (self.lidar.dataTransf.time < self.endTime)
)
anomStd = tmpAnom.std(dim=["time", "range", "elv"])
# Cross check if this commented part is still needed for
# for filter the slanted profiles
# tmpNoCloud = self.lidar.dataTransf.where(self.timeCloudMask == 0).copy()
# tmpCloud = self.lidar.dataTransf.where(self.timeCloudMask == 1).copy()
# tmpCloud = tmpCloud.where(np.abs(self.dataAnom) < self.nStd * anomStd)
# tmpCleanData = tmpNoCloud.copy()
# tmpCleanData.values[np.isfinite(tmpCloud)] = tmpCloud.values[np.isfinite(tmpCloud)]
tmpCleanData = self.lidar.dataTransf.copy()
tmpCleanData = tmpCleanData.where(
np.abs(self.dataAnom) < self.nStd * anomStd
)
self.lidar.dataTransf.values = tmpCleanData.values
def cleaning90(self):
"""
It removes the data that is larger than the nStd * anomaly
from the vertical observations
"""
tmpAnom = self.dataAnom90.where(
(self.lidar.dataTransf90.time > self.startTime)
& (self.lidar.dataTransf90.time < self.endTime)
)
anomStd = tmpAnom.std(dim=["time", "range90"])
# tmpNoCloud = self.lidar.dataTransf90.where(self.timeCloudMask == 0).copy()
# tmpCloud = self.lidar.dataTransf90.where(self.timeCloudMask == 1).copy()
# tmpCloud = tmpCloud.where(np.abs(self.dataAnom90) < self.nStd * anomStd)
# tmpCleanData = tmpNoCloud.copy()
# tmpCleanData.values[np.isfinite(tmpCloud)] = tmpCloud.values[np.isfinite(tmpCloud)]
tmpCleanData = self.lidar.dataTransf90.copy()
tmpCleanData = tmpCleanData.where(
np.abs(self.dataAnom90) < self.nStd * anomStd
)
self.lidar.dataTransf90.values = tmpCleanData.values
# it removes STE and clouds contamination
# from above the aerosol loaded region
[docs]class windCubeCloudRemoval:
"""Above boundary layer second trip echoes filter
This filter reduces the second trip echoes contamination
and clouds from regions above the boundary layer. It
requires information from the ceilometer.
IT IS STILL EXPERIMENTAL
Parameters
----------
ceilo : xarray.Dataset
A dataset of the CHM15k Nimbus ceilometer observations.
lidar : xarray.Dataset
An instance of the re-structured WindCube dataset
Returns
-------
object : object
an object containing an instance of the noise
height interface and the re-structured dataset
filtered for STE and clouds.
"""
[docs] def __init__(self, ceilo, lidar=None):
self.lidar = lidar
self.ceilo = ceilo
self.getNoiseFreeBeta()
self.getHeightInterface()
if lidar != None:
self.getInterpInterfHeight()
self.removeCloud()
def getNoiseFreeBeta(self):
"""
It removes the noise from the backscattered signal
"""
positiveBeta = self.ceilo.beta_raw.where(self.ceilo.beta_raw > 0)
positiveBeta = positiveBeta.rolling(
range=10, center=True, min_periods=10
).mean()
positiveBeta = positiveBeta.rolling(
time=15, center=True, min_periods=15
).mean()
self.noiseFreeBeta = positiveBeta
return self
def getHeightInterface(self):
"""
It identifies the height of the separation between
the noise and the non-noise data from the ceilometer
backscattered signal
"""
positiveBeta = self.ceilo.beta_raw.where(self.ceilo.beta_raw > 0)
tmpCeiloHgt = positiveBeta.copy()
tmpValues = tmpCeiloHgt.values
tmpValues[np.isfinite(tmpValues)] = 1
tmpCeiloHgt.values = tmpValues
tmpCeiloHgt = tmpCeiloHgt * self.ceilo.range
lowestBeta = self.noiseFreeBeta.where(tmpCeiloHgt < 4e3)
tmpCeiloHgt = tmpCeiloHgt.where(np.isfinite(lowestBeta))
self.interfHeight = tmpCeiloHgt.max(dim="range")
self.interfHeight = self.interfHeight.rolling(
time=7, center=True, min_periods=5
).mean()
return self
def getInterpInterfHeight(self):
"""
It interpolates the noise height interface to the same
temporal resolution from the windcube data
"""
self.interpInterfHeight = self.interfHeight.interp(
time=self.lidar.dataTransf.time
)
self.interpInterfHeight90 = self.interfHeight.interp(
time=self.lidar.dataTransf90.time
)
return self
def removeCloud(self):
"""
It removes from the windcube's observation all
data above the noise height interface
"""
tmpHeight = self.lidar.dataTransf.copy()
tmpValues = tmpHeight.values
tmpValues[np.isfinite(tmpValues)] = 1
tmpHeight.values = tmpValues
tmpHeight = tmpHeight * self.lidar.dataTransf.range
self.lidar.dataTransf = self.lidar.dataTransf.where(
tmpHeight < self.interpInterfHeight
)
tmpHeight = self.lidar.dataTransf90.copy()
tmpValues = tmpHeight.values
tmpValues[np.isfinite(tmpValues)] = 1
tmpHeight.values = tmpValues
tmpHeight = tmpHeight * self.lidar.dataTransf90.range90
self.lidar.dataTransf90 = self.lidar.dataTransf90.where(
tmpHeight < self.interpInterfHeight90
)
self.lidar.relative_beta90 = self.lidar.relative_beta90.where(
tmpHeight < self.interpInterfHeight90
)
return self