Source code for aqua_fetch.rr._waterbenchiowa


import os
from typing import List, Union, Dict

import pandas as pd

from .camels import Camels
from ..utils import check_attributes

from ._map import (
    catchment_area,
    gauge_latitude,
    gauge_longitude,
    slope
    )



[docs]
class WaterBenchIowa(Camels):
    """
    Rainfall run-off dataset for Iowa (US) following the work of
    `Demir et al., 2022 <https://doi.org/10.5194/essd-14-5605-2022>`_
    This is hourly dataset of 125 catchments with
    7 static features and 3 dyanmic features (pcp, et, discharge) for each catchment.
    The dyanmic features are timeseries from 2011-10-01 12:00 to 2018-09-30 11:00.

    Examples
    --------
    >>> from water_datasets import WaterBenchIowa
    >>> ds = WaterBenchIowa()
    ... # fetch static and dynamic features of 5 stations
    >>> data = ds.fetch(5, as_dataframe=True)
    >>> data.shape  # it is a multi-indexed DataFrame
    (184032, 5)
    ... # fetch both static and dynamic features of 5 stations
    >>> data = ds.fetch(5, static_features="all", as_dataframe=True)
    >>> data.keys()
    dict_keys(['dynamic', 'static'])
    >>> data['static'].shape
    (5, 7)
    >>> data['dynamic']  # returns a xarray DataSet
    ... # using another method
    >>> data = ds.fetch_dynamic_features('644', as_dataframe=True)
    >>> data.unstack().shape
    (61344, 3)
    # when we get both static and dynamic data, the returned data is a dictionary
    # with ``static`` and ``dyanic`` keys.
    >>> data = ds.fetch(stations='644', static_features="all", as_dataframe=True)
    >>> data['static'].shape, data['dynamic'].shape
    >>> ((1, 7), (184032, 1))
    """
    url = "https://zenodo.org/record/7087806#.Y6rW-BVByUk"


[docs]
    def __init__(self, path=None, **kwargs):
        super(WaterBenchIowa, self).__init__(path=path, **kwargs)

        self._download()

        self._maybe_to_netcdf('WaterBenchIowa.nc')


    @property
    def static_map(self) -> Dict[str, str]:
        return {
            'area': catchment_area(),
            'slope': slope('perc'),
        }

    @property
    def dyn_map(self):
        return {
        'discharge': 'obs_q_mmd',
        'precipitation': 'pcp_mm',
        }

    def stations(self)->List[str]:
        return [fname.split('_')[0] for fname in os.listdir(self.ts_path) if fname.endswith('.csv')]

    @property
    def ts_path(self)->str:
        return os.path.join(self.path, 'data_time_series', 'data_time_series')

    @property
    def dynamic_features(self) -> List[str]:
        return ['precipitation', 'et', 'discharge']

    @property
    def static_features(self)->List[str]:
        return ['travel_time', 'area', 'slope', 'loam', 'silt',
                'sandy_clay_loam', 'silty_clay_loam']

    @property
    def _area(self)->str:
        return 'area'


[docs]
    def fetch_station_attributes(
            self,
            station: str,
            dynamic_features: Union[str, list, None] = 'all',
            static_features: Union[str, list, None] = None,
            as_ts: bool = False,
            st: Union[str, None] = None,
            en: Union[str, None] = None,
            **kwargs
    ) -> pd.DataFrame:

        """

        Examples
        --------
            >>> from water_datasets import WaterBenchIowa
            >>> dataset = WaterBenchIowa()
            >>> data = dataset.fetch_station_attributes('666')
        """
        check_attributes(dynamic_features, self.dynamic_features)
        fname = os.path.join(self.ts_path, f"{station}_data.csv")
        df = pd.read_csv(fname)
        df.index = pd.to_datetime(df.pop('datetime'))

        return df



[docs]
    def fetch_static_features(
            self,
            stn_id: Union[str, List[str]],
            static_features:Union[str, List[str]] = "all"
    )->pd.DataFrame:
        """

        Parameters
        ----------
            stn_id : str
                name/id of station of which to extract the data
            static_features : list/str, optional (default="all")
                The name/names of features to fetch. By default, all available
                static features are returned.

        Examples
        ---------
        >>> from water_datasets import WaterBenchIowa
        >>> dataset = WaterBenchIowa()
        get the names of stations
        >>> stns = dataset.stations()
        >>> len(stns)
            125
        get all static data of all stations
        >>> static_data = dataset.fetch_static_features(stns)
        >>> static_data.shape
           (125, 7)
        get static data of one station only
        >>> static_data = dataset.fetch_static_features('592')
        >>> static_data.shape
           (1, 7)
        get the names of static features
        >>> dataset.static_features
        get only selected features of all stations
        >>> static_data = dataset.fetch_static_features(stns, ['slope', 'area'])
        >>> static_data.shape
           (125, 2)
        >>> data = dataset.fetch_static_features('592', static_features=['slope', 'area'])
        >>> data.shape
           (1, 2)

        """
        stn_id = check_attributes(stn_id, self.stations())

        features = check_attributes(static_features, self.static_features, 'static_features')

        dfs = []
        for stn in stn_id:
            fname = os.path.join(self.ts_path, f"{stn}_data.csv")
            df = pd.read_csv(fname, nrows=1)
            dfs.append(df[features])

        return pd.concat(dfs)


    def _read_dynamic_from_csv(
            self,
            stations,
            dynamic_features,
            st=None,
            en=None)->dict:

        dyn = dict()
        for stn in stations:
            fname = os.path.join(self.ts_path, f"{stn}_data.csv")
            df = pd.read_csv(fname)
            df.index = pd.to_datetime(df.pop('datetime'))
            dyn[stn] = df[self.dynamic_features]
        return dyn

    @property
    def start(self):
        return "20111001 12:00"

    @property
    def end(self):
        return "20180930 11:00"