"""
Rainfall Runoff datasets
"""
# ExtendinG SUb-DAily River Discharge data over INdia (GUARDIAN)
# https://springernature.figshare.com/articles/dataset/ExtendinG_SUb-DAily_River_Discharge_data_over_INdia_GUARDIAN_/27004282
import os
from typing import Union, List
import pandas as pd
from .._backend import plt, plt_Axes
from .camels import Camels
from ._camels import CAMELS_AUS
from ._camels import CAMELS_CL
from ._camels import CAMELS_GB
from ._camels import CAMELS_US
from ._lamah import LamaHCE
from ._brazil import CAMELS_BR
from ._brazil import CABra
from ._hysets import HYSETS
from ._hype import HYPE
from ._camels import CAMELS_DK
from ._waterbenchiowa import WaterBenchIowa
from ._gsha import GSHA
from ._ccam import CCAM
from ._rrluleasweden import RRLuleaSweden
from ._camels import CAMELS_CH
from ._lamah import LamaHIce
from ._camels import CAMELS_DE
from ._grdccaravan import GRDCCaravan
from ._camels import CAMELS_SE
from ._simbi import Simbi
from ._denmark import Caravan_DK
from ._bull import Bull
from ._camels import CAMELS_IND
from ._gsha import Arcticnet
from ._usgs import USGS
from ._estreams import EStreams
from ._gsha import Japan
from ._gsha import Thailand
from ._gsha import Spain
from ._estreams import Ireland
from ._estreams import Finland
from ._estreams import Finland
from ._estreams import Poland
from ._estreams import Italy
from ._camels import CAMELS_FR
from ._estreams import Portugal
# following are not available with RainfallRunoff class yet
from ._npctr import NPCTRCatchments
from .mtropics import MtropicsLaos
from .mtropics import MtropcsThailand
from .mtropics import MtropicsVietnam
DATASETS = {
"camels": Camels,
"CAMELS_AUS": CAMELS_AUS,
"CAMELS_CL": CAMELS_CL,
"CAMELS_GB": CAMELS_GB,
"CAMELS_US": CAMELS_US,
"LamaHCE": LamaHCE,
"CAMELS_BR": CAMELS_BR,
"CABra": CABra,
"HYSETS": HYSETS,
"HYPE": HYPE,
"CAMELS_DK": CAMELS_DK,
"WaterBenchIowa": WaterBenchIowa,
"GSHA": GSHA,
"EStreams": EStreams,
"CCAM": CCAM,
"RRLuleaSweden": RRLuleaSweden,
"CAMELS_CH": CAMELS_CH,
"LamaHIce": LamaHIce,
"CAMELS_DE": CAMELS_DE,
"GRDCCaravan": GRDCCaravan,
"CAMELS_SE": CAMELS_SE,
"Simbi": Simbi,
"Caravan_DK": Caravan_DK,
"Bull": Bull,
"CAMELS_IND": CAMELS_IND,
"USGS": USGS,
"Arcticnet": Arcticnet,
'Japan': Japan,
'Spain': Spain,
'Thailand': Thailand,
'Ireland': Ireland,
'Finland': Finland,
'Poland': Poland,
'Italy': Italy,
'CAMELS_FR': CAMELS_FR,
'Portugal': Portugal,
}
"""
.. list-table:: Naming Convention for dynamic features
:widths: 20 30
:header-rows: 1
* - Feature Name
- Description
* - obs_q_cms
- observed streamflow in cms
* - obs_q_mmd
- observed streamflow in mm/day
* - pcp_mm
- precipitation in mm
* - max_temp_C
- maximum air temperature in degree celcius
* - min_temp_C
- minimum air temperature in degree celcius
* - mean_temp_C
- mean temperature in degree celcius
* - method_pet_mm
- potential evapotranspiration in mm
* - et_mm
- evapotranspiration in mm
* - rh_%
- relative humidity in percentage
* - min_rh_%
- minimum relative humidity
* - max_rh_%
- maximum relative humidity
* - swe_mm
- snow water equivalent
* - solrad_wm2
- solar radiation watt per meter square
* - windspeed_ms
- wind speed in meter per second
* - sim_q_cms
- simulated streamflow in cms
* - dwn_lw_rad_wm2
- downward long wave radiation in watt per meter square
* - dwn_sw_rad_wm2
- downward short wave radiation in watt per meter square
* - airpres_hpa
- Mean air pressure at sea level in hectopascal
.. list-table:: Naming Convention for static features
:widths: 20 30
:header-rows: 1
* - Feature Name
- Description
* - guage_lat
- Latitude of the guage station
* - guage_long
- Longitude of the guage station
* - area_km2
- catchment area in km2
* - mean_elev
- mean elevation in meters
"""
[docs]
class RainfallRunoff(object):
"""
This class provides access to all the rainfall-runoff
datasets. For simiplity and resusability, use this class
instead of using the individual dataset classes.
Examples
--------
>>> from water_datasets import RainfallRunoff
>>> dataset = RainfallRunoff('CAMELS_AUS') # instead of CAMELS_AUS, you can provide any other dataset name
>>> df = dataset.fetch(stations=1, as_dataframe=True)
>>> df = df.unstack() # the returned dataframe is a multi-indexed dataframe so we have to unstack it
>>> df.columns = df.columns.get_level_values('dynamic_features')
>>> df.shape
(21184, 26)
... # get name of all stations as list
>>> stns = dataset.stations()
>>> len(stns)
222
... # get data of 10 % of stations as dataframe
>>> df = dataset.fetch(0.1, as_dataframe=True)
>>> df.shape
(550784, 22)
... # The returned dataframe is a multi-indexed data
>>> df.index.names == ['time', 'dynamic_features']
True
... # get data by station id
>>> df = dataset.fetch(stations='224214A', as_dataframe=True).unstack()
>>> df.shape
(21184, 26)
... # get names of available dynamic features
>>> dataset.dynamic_features
... # get only selected dynamic features
>>> data = dataset.fetch(1, as_dataframe=True,
... dynamic_features=['tmax_AWAP', 'precipitation_AWAP', 'et_morton_actual_SILO', 'streamflow_MLd']).unstack()
>>> data.shape
(21184, 4)
... # get names of available static features
>>> dataset.static_features
... # get data of 10 random stations
>>> df = dataset.fetch(10, as_dataframe=True)
>>> df.shape # remember this is a multiindexed dataframe
(21184, 260)
# when we get both static and dynamic data, the returned data is a dictionary
# with ``static`` and ``dyanic`` keys.
>>> data = dataset.fetch(stations='224214A', static_features="all", as_dataframe=True)
>>> data['static'].shape, data['dynamic'].shape
((1, 166), (550784, 1))
>>> coords = dataset.stn_coords() # returns coordinates of all stations
>>> coords.shape
(472, 2)
>>> dataset.stn_coords('3001') # returns coordinates of station whose id is 3001
18.3861 80.3917
>>> dataset.stn_coords(['3001', '17021']) # returns coordinates of two stations
See :ref:`sphx_glr_auto_examples_camels_australia.py` for more comprehensive usage example.
"""
[docs]
def __init__(
self,
dataset: str,
path: Union[str, os.PathLike] = None,
overwrite: bool = False,
to_netcdf: bool = True,
processes: int = None,
remove_zip: bool = True,
verbosity: int = 1,
**kwargs
):
"""
Rainfall Runoff datasets
Parameters
----------
dataset: str
dataset name. This must be one of the following:
- ``Arcticnet``
- ``Bull``
- ``CABra``
- ``CCAM``
- ``CAMELS_AUS``
- ``CAMELS_BR``
- ``CAMELS_CH``
- ``CAMELS_CL``
- ``CAMELS_DE``
- ``CAMELS_DK0``
- ``CAMELS_DK``
- ``CAMELS_FR``
- ``CAMELS_GB``
- ``CAMELS_IND``
- ``CAMELS_SE``
- ``CAMELS_US``
- ``EStreams``
- ``Finland``
- ``GRDCCaravan``
- ``GSHA``
- ``HYSETS``
- ``HYPE``
- ``Ireland``
- ``Italy``
- ``Japan``
- ``LamaHCE``
- ``LamaHIce``
- ``Poland``
- ``Portugal``
- ``RRLuleaSweden``
- ``Simbi``
- ``Spain``
- ``Thailand``
- ``USGS``
- ``WaterBenchIowa``
path : str
path to directory inside which data is located/downloaded.
If provided and the path/dataset exists, then the data will be read
from this path. If provided and the path/dataset does not exist,
then the data will be downloaded at this path. If not provided,
then the data will be downloaded in the default path which is
``.../water-datasts/data/``.
overwrite : bool
If the data is already downloaded then you can set it to True,
to make a fresh download.
to_netcdf : bool
whether to convert all the data into one netcdf file or not.
This will fasten repeated calls to fetch etc but will
require netcdf5 package as well as xarray.
verbosity : int
0: no message will be printed
kwargs :
additional keyword arguments for the underlying dataset class
For example ``version`` for :py:class:`water_quality.rr.CAMELS_AUS` or ``timestep`` for
:py:class:`water_quality.rr.LamaHCE` dataset or ``met_src`` for ``CAMELS_BR``
"""
if dataset not in DATASETS:
raise ValueError(f"Dataset {dataset} not available")
self.dataset = DATASETS[dataset](
path=path,
overwrite=overwrite,
to_netcdf=to_netcdf,
processes=processes,
remove_zip=remove_zip,
verbosity=verbosity,
**kwargs
)
def __str__(self):
return f"{self.name} with {len(self.stations())} stations, {self.num_dynamic()} dynamic and {self.num_static()} static features"
def __len__(self):
return len(self.stations())
[docs]
def num_dynamic(self) -> int:
"""number of dynamic features associated with the dataset"""
return len(self.dynamic_features)
[docs]
def num_static(self) -> int:
"""number of static features associated with the dataset"""
return len(self.static_features)
@property
def name(self) -> str:
"""
returns name of dataset
"""
return self.dataset.name
@property
def path(self) -> str:
"""
returns path where the data is stored. The default path is
~../water_quality/data
"""
return self.dataset.path
@property
def static_features(self) -> List[str]:
"""
returns names of static features as python list of strings
Examples
--------
>>> from water_datasets import RainfallRunoff
>>> dataset = RainfallRunoff('CAMELS_AUS')
>>> dataset.static_features
"""
return self.dataset.static_features
@property
def dynamic_features(self) -> List[str]:
"""
returns names of dynamic features as python list of strings
Examples
--------
>>> from water_datasets import RainfallRunoff
>>> dataset = RainfallRunoff('CAMELS_AUS')
>>> dataset.dynamic_features
"""
return self.dataset.dynamic_features
[docs]
def fetch_static_features(
self,
stations: Union[str, list] = "all",
static_features: Union[str, list] = "all"
) -> pd.DataFrame:
"""Fetches all or selected static attributes of one or more stations.
Parameters
----------
stations : str
name/id of station of which to extract the data
features : list/str, optional (default="all")
The name/names of features to fetch. By default, all available
static features are returned.
Returns
-------
pd.DataFrame
a pandas dataframe
Examples
--------
>>> from water_datasets import RainfallRunoff
>>> camels = RainfallRunoff('CAMELS_AUS')
>>> camels.fetch_static_features('224214A')
>>> camels.static_features
>>> camels.fetch_static_features('224214A',
... features=['elev_mean', 'relief', 'ksat', 'pop_mean'])
"""
return self.dataset.fetch_static_features(stations, static_features)
[docs]
def area(
self,
stations: Union[str, List[str]] = "all"
) -> pd.Series:
"""
Returns area (Km2) of all/selected catchments as pandas series
parameters
----------
stations : str/list (default=``all``)
name/names of stations. Default is ``all``, which will return
area of all stations
Returns
--------
pd.Series
a pandas series whose indices are catchment ids and values
are areas of corresponding catchments.
Examples
---------
>>> from water_datasets import RainfallRunoff
>>> dataset = RainfallRunoff('CAMELS_CH')
>>> dataset.area() # returns area of all stations
>>> dataset.area('2004') # returns area of station whose id is 2004
>>> dataset.area(['2004', '6004']) # returns area of two stations
"""
return self.dataset.area(stations)
[docs]
def fetch(self,
stations: Union[str, List[str], int, float] = "all",
dynamic_features: Union[List[str], str, None] = 'all',
static_features: Union[str, List[str], None] = None,
st: Union[None, str] = None,
en: Union[None, str] = None,
as_dataframe: bool = False,
**kwargs # todo, where do these keyword args go?
) -> Union[dict, pd.DataFrame]:
"""
Fetches the features of one or more stations.
parameters
----------
stations :
It can have following values:
- int : number of (randomly selected) stations to fetch
- float : fraction of (randomly selected) stations to fetch
- str : name/id of station to fetch. However, if ``all`` is
provided, then all stations will be fetched.
- list : list of names/ids of stations to fetch
dynamic_features : (default=``all``)
It can have following values:
- str : name of dynamic feature to fetch. If ``all`` is
provided, then all dynamic features will be fetched.
- list : list of dynamic features to fetch.
- None : No dynamic feature will be fetched.
static_features : (default=None)
It can have following values:
- str : name of static feature to fetch. If ``all`` is
provided, then all static features will be fetched.
- list : list of static features to fetch.
- None : No static feature will be fetched.
st :
starting date of data to be returned. If None, the data will be
returned from where it is available.
en :
end date of data to be returned. If None, then the data will be
returned till the date data is available.
as_dataframe :
whether to return dynamic attributes as pandas
dataframe or as xarray dataset.
kwargs :
keyword arguments
returns
-------
If both static and dynamic features are obtained then it returns a
dictionary whose keys are station/gauge_ids and values are the
attributes and dataframes.
Otherwise either dynamic or static features are returned.
Examples
--------
>>> from water_datasets import RainfallRunoff
>>> dataset = RainfallRunoff('CAMELS_AUS')
>>> # get data of 10% of stations
>>> df = dataset.fetch(stations=0.1, as_dataframe=True) # returns a multiindex dataframe
... # fetch data of 5 (randomly selected) stations
>>> five_random_stn_data = dataset.fetch(stations=5, as_dataframe=True)
... # fetch data of 3 selected stations
>>> three_selec_stn_data = dataset.fetch(stations=['912101A','912105A','915011A'], as_dataframe=True)
... # fetch data of a single stations
>>> single_stn_data = dataset.fetch(stations='318076', as_dataframe=True)
... # get both static and dynamic features as dictionary
>>> data = dataset.fetch(1, static_features="all", as_dataframe=True) # -> dict
>>> data['dynamic']
... # get only selected dynamic features
>>> sel_dyn_features = dataset.fetch(stations='318076',
... dynamic_features=['streamflow_MLd', 'solarrad_AWAP'], as_dataframe=True)
... # fetch data between selected periods
>>> data = dataset.fetch(stations='318076', st="20010101", en="20101231", as_dataframe=True)
"""
return self.dataset.fetch(stations, dynamic_features, static_features, st, en, as_dataframe, **kwargs)
[docs]
def fetch_stations_features(
self,
stations: Union[str, List[str]],
dynamic_features: Union[str, List[str], None] = 'all',
static_features: Union[str, List[str], None] = None,
st=None,
en=None,
as_dataframe: bool = False,
**kwargs
):
"""
Reads attributes of more than one stations.
parameters
----------
stations :
list of stations for which data is to be fetched.
dynamic_features :
list of dynamic features to be fetched.
if 'all', then all dynamic features will be fetched.
static_features :
list of static features to be fetched.
If `all`, then all static features will be fetched. If None,
then no static attribute will be fetched.
st :
start of data to be fetched.
en :
end of data to be fetched.
as_dataframe : whether to return the data as pandas dataframe. default
is xr.Dataset object
kwargs dict:
additional keyword arguments
Returns
-------
pd.DataFrame or xr.Dataset or dict
Dynamic and static features of one or multiple stations. Dynamic features
are by default returned as xr.Dataset unless `as_dataframe` is True or xarray is not installed, in
such a case, it is a pandas dataframe with multiindex. If xr.Dataset,
it consists of `data_vars` equal to number of stations and for each
station, the `DataArray` is of dimensions (time, dynamic_features).
where `time` is defined by `st` and `en` i.e. length of `DataArray`.
In case, when the returned object is pandas DataFrame, the first index
is `time` and second index is `dyanamic_features`. Static attributes
are always returned as pandas DataFrame and have following shape
`(stations, static_features). If `dynamic_features` is None,
then they are not returned and the returned value only consists of
static features. Same holds true for `static_features`.
If both are not None, then the returned type is a dictionary with
`static` and `dynamic` keys.
Raises
------
ValueError, if both ``dynamic_features`` and ``static_features`` are None
Examples
--------
>>> from water_datasets import RainfallRunoff
>>> dataset = RainfallRunoff('CAMELS_AUS')
... # find out station ids
>>> dataset.stations()
... # get data of selected stations
>>> dataset.fetch_stations_features(['912101A', '912105A', '915011A'],
... as_dataframe=True)
"""
return self.dataset.fetch_stations_features(stations, dynamic_features, static_features, st, en, as_dataframe,
**kwargs)
[docs]
def fetch_dynamic_features(
self,
stn_id: str,
dynamic_features='all',
st=None,
en=None,
as_dataframe=False
):
"""Fetches all or selected dynamic attributes of one station.
Parameters
----------
stn_id : str
name/id of station of which to extract the data
features : list/str, optional (default="all")
The name/names of features to fetch. By default, all available
dynamic features are returned.
st : Optional (default=None)
start time from where to fetch the data.
en : Optional (default=None)
end time untill where to fetch the data
as_dataframe : bool, optional (default=False)
if true, the returned data is pandas DataFrame otherwise it
is xarray dataset
Examples
--------
>>> from water_datasets import RainfallRunoff
>>> camels = RainfallRunoff('CAMELS_AUS')
>>> camels.fetch_dynamic_features('224214A', as_dataframe=True).unstack()
>>> camels.dynamic_features
>>> camels.fetch_dynamic_features('224214A',
... features=['tmax_AWAP', 'vprp_AWAP', 'streamflow_mmd'],
... as_dataframe=True).unstack()
"""
return self.dataset.fetch_dynamic_features(
stn_id, dynamic_features, st, en, as_dataframe)
[docs]
def fetch_station_features(
self,
stn_id: str,
dynamic_features: Union[str, list, None] = 'all',
static_features: Union[str, list, None] = None,
as_ts: bool = False,
st: Union[str, None] = None,
en: Union[str, None] = None,
**kwargs
) -> pd.DataFrame:
"""
Fetches features for one station.
Parameters
-----------
station :
station id/gauge id for which the data is to be fetched.
dynamic_features : str/list, optional
names of dynamic features/attributes to fetch
static_features :
names of static features/attributes to be fetches
as_ts : bool
whether static features are to be converted into a time
series or not. If yes then the returned time series will be of
same length as that of dynamic attribtues.
st : str,optional
starting point from which the data to be fetched. By default,
the data will be fetched from where it is available.
en : str, optional
end point of data to be fetched. By default the dat will be fetched
Returns
-------
pd.DataFrame
dataframe if as_ts is True else it returns a dictionary of static and
dynamic features for a station/gauge_id
Examples
--------
>>> from water_datasets import RainfallRunoff
>>> dataset = RainfallRunoff('CAMELS_AUS')
>>> dataset.fetch_station_features('912101A')
"""
return self.dataset.fetch_station_features(stn_id, dynamic_features, static_features, as_ts, st, en, **kwargs)
[docs]
def plot_stations(
self,
stations: List[str] = 'all',
marker='.',
ax: plt_Axes = None,
show: bool = True,
**kwargs
) -> plt_Axes:
"""
plots coordinates of stations
Parameters
----------
stations :
name/names of stations. If not given, all stations will be plotted
marker :
marker to use.
ax : plt.Axes
matplotlib axes to draw the plot. If not given, then
new axes will be created.
show : bool
**kwargs
Returns
-------
plt.Axes
Examples
--------
>>> from water_datasets import RainfallRunoff
>>> dataset = RainfallRunoff('CAMELS_AUS')
>>> dataset.plot_stations()
>>> dataset.plot_stations(['1', '2', '3'])
>>> dataset.plot_stations(marker='o', ms=0.3)
>>> ax = dataset.plot_stations(marker='o', ms=0.3, show=False)
>>> ax.set_title("Stations")
>>> plt.show()
"""
return self.dataset.plot_stations(stations, marker, ax, show, **kwargs)
[docs]
def q_mmd(
self,
stations: Union[str, List[str]] = 'all'
) -> pd.DataFrame:
"""
returns streamflow in the units of milimeter per day. This is obtained
by diving ``q``/area
parameters
----------
stations : str/list
name/names of stations. Default is ``all``, which will return
area of all stations
Returns
--------
pd.DataFrame
a pandas DataFrame whose indices are time-steps and columns
are catchment/station ids.
"""
return self.dataset.q_mmd(stations)
[docs]
def stn_coords(
self,
stations: Union[str, List[str]] = "all"
) -> pd.DataFrame:
"""
returns coordinates of stations as DataFrame
with ``long`` and ``lat`` as columns.
Parameters
----------
stations :
name/names of stations. If not given, coordinates
of all stations will be returned.
Returns
-------
coords :
pandas DataFrame with ``long`` and ``lat`` columns.
The length of dataframe will be equal to number of stations
wholse coordinates are to be fetched.
Examples
--------
>>> from water_datasets import RainfallRunoff
>>> dataset = RainfallRunoff('CAMELS_CH')
>>> dataset.stn_coords() # returns coordinates of all stations
>>> dataset.stn_coords('2004') # returns coordinates of station whose id is 2004
>>> dataset.stn_coords(['2004', '6004']) # returns coordinates of two stations
>>> from water_datasets import RainfallRunoff
>>> dataset = RainfallRunoff('CAMELS_AUS')
>>> dataset.stn_coords() # returns coordinates of all stations
>>> dataset.stn_coords('912101A') # returns coordinates of station whose id is 912101A
>>> dataset.stn_coords(['G0050115', '912101A']) # returns coordinates of two stations
"""
return self.dataset.stn_coords(stations)
[docs]
def get_boundary(
self,
stn_id: str,
as_type: str = 'numpy'
):
"""
returns boundary of a catchment in a required format
Parameters
----------
stn_id : str
name/id of catchment
as_type : str
'numpy' or 'geopandas'
Examples
--------
>>> from water_datasets import RainfallRunoff
>>> dataset = RainfallRunoff('CAMELS_SE')
>>> dataset.get_boundary(dataset.stations()[0])
"""
return self.dataset.get_boundary(stn_id, as_type)
[docs]
def plot_catchment(
self,
stn_id: str,
ax: plt_Axes = None,
show: bool = True,
**kwargs
) -> plt.Axes:
"""
plots catchment boundaries
Parameters
----------
ax : plt.Axes
matplotlib axes to draw the plot. If not given, then
new axes will be created.
show : bool
**kwargs
Returns
-------
plt.Axes
Examples
--------
>>> from water_datasets import RainfallRunoff
>>> dataset = RainfallRunoff('CAMELS_AUS')
>>> dataset.plot_catchment()
>>> dataset.plot_catchment(marker='o', ms=0.3)
>>> ax = dataset.plot_catchment(marker='o', ms=0.3, show=False)
>>> ax.set_title("Catchment Boundaries")
>>> plt.show()
"""
return self.dataset.plot_catchment(stn_id, ax, show, **kwargs)
[docs]
def stations(self) -> List[str]:
"""
returns names of all stations
Examples
--------
>>> from water_datasets import RainfallRunoff
>>> dataset = RainfallRunoff('CAMELS_AUS')
>>> dataset.stations()
"""
return self.dataset.stations()
@property
def start(self) -> str:
"""
returns starting date of data
Examples
--------
>>> from water_datasets import RainfallRunoff
>>> dataset = RainfallRunoff('CAMELS_AUS')
>>> dataset.start()
"""
return self.dataset.start
@property
def end(self) -> str:
"""
returns end date of data
Examples
--------
>>> from water_datasets import RainfallRunoff
>>> dataset = RainfallRunoff('CAMELS_AUS')
>>> dataset.end()
"""
return self.dataset.end