Source code for aqua_fetch.rr._waterbenchiowa


import os
from typing import List, Union, Dict

import pandas as pd

from .camels import Camels
from ..utils import check_attributes

from ._map import (
    catchment_area,
    gauge_latitude,
    gauge_longitude,
    slope
    )


[docs] class WaterBenchIowa(Camels): """ Rainfall run-off dataset for Iowa (US) following the work of `Demir et al., 2022 <https://doi.org/10.5194/essd-14-5605-2022>`_ This is hourly dataset of 125 catchments with 7 static features and 3 dyanmic features (pcp, et, discharge) for each catchment. The dyanmic features are timeseries from 2011-10-01 12:00 to 2018-09-30 11:00. Examples -------- >>> from water_datasets import WaterBenchIowa >>> ds = WaterBenchIowa() ... # fetch static and dynamic features of 5 stations >>> data = ds.fetch(5, as_dataframe=True) >>> data.shape # it is a multi-indexed DataFrame (184032, 5) ... # fetch both static and dynamic features of 5 stations >>> data = ds.fetch(5, static_features="all", as_dataframe=True) >>> data.keys() dict_keys(['dynamic', 'static']) >>> data['static'].shape (5, 7) >>> data['dynamic'] # returns a xarray DataSet ... # using another method >>> data = ds.fetch_dynamic_features('644', as_dataframe=True) >>> data.unstack().shape (61344, 3) # when we get both static and dynamic data, the returned data is a dictionary # with ``static`` and ``dyanic`` keys. >>> data = ds.fetch(stations='644', static_features="all", as_dataframe=True) >>> data['static'].shape, data['dynamic'].shape >>> ((1, 7), (184032, 1)) """ url = "https://zenodo.org/record/7087806#.Y6rW-BVByUk"
[docs] def __init__(self, path=None, **kwargs): super(WaterBenchIowa, self).__init__(path=path, **kwargs) self._download() self._maybe_to_netcdf('WaterBenchIowa.nc')
@property def static_map(self) -> Dict[str, str]: return { 'area': catchment_area(), 'slope': slope('perc'), } @property def dyn_map(self): return { 'discharge': 'obs_q_mmd', 'precipitation': 'pcp_mm', } def stations(self)->List[str]: return [fname.split('_')[0] for fname in os.listdir(self.ts_path) if fname.endswith('.csv')] @property def ts_path(self)->str: return os.path.join(self.path, 'data_time_series', 'data_time_series') @property def dynamic_features(self) -> List[str]: return ['precipitation', 'et', 'discharge'] @property def static_features(self)->List[str]: return ['travel_time', 'area', 'slope', 'loam', 'silt', 'sandy_clay_loam', 'silty_clay_loam'] @property def _area(self)->str: return 'area'
[docs] def fetch_station_attributes( self, station: str, dynamic_features: Union[str, list, None] = 'all', static_features: Union[str, list, None] = None, as_ts: bool = False, st: Union[str, None] = None, en: Union[str, None] = None, **kwargs ) -> pd.DataFrame: """ Examples -------- >>> from water_datasets import WaterBenchIowa >>> dataset = WaterBenchIowa() >>> data = dataset.fetch_station_attributes('666') """ check_attributes(dynamic_features, self.dynamic_features) fname = os.path.join(self.ts_path, f"{station}_data.csv") df = pd.read_csv(fname) df.index = pd.to_datetime(df.pop('datetime')) return df
[docs] def fetch_static_features( self, stn_id: Union[str, List[str]], static_features:Union[str, List[str]] = "all" )->pd.DataFrame: """ Parameters ---------- stn_id : str name/id of station of which to extract the data static_features : list/str, optional (default="all") The name/names of features to fetch. By default, all available static features are returned. Examples --------- >>> from water_datasets import WaterBenchIowa >>> dataset = WaterBenchIowa() get the names of stations >>> stns = dataset.stations() >>> len(stns) 125 get all static data of all stations >>> static_data = dataset.fetch_static_features(stns) >>> static_data.shape (125, 7) get static data of one station only >>> static_data = dataset.fetch_static_features('592') >>> static_data.shape (1, 7) get the names of static features >>> dataset.static_features get only selected features of all stations >>> static_data = dataset.fetch_static_features(stns, ['slope', 'area']) >>> static_data.shape (125, 2) >>> data = dataset.fetch_static_features('592', static_features=['slope', 'area']) >>> data.shape (1, 2) """ stn_id = check_attributes(stn_id, self.stations()) features = check_attributes(static_features, self.static_features, 'static_features') dfs = [] for stn in stn_id: fname = os.path.join(self.ts_path, f"{stn}_data.csv") df = pd.read_csv(fname, nrows=1) dfs.append(df[features]) return pd.concat(dfs)
def _read_dynamic_from_csv( self, stations, dynamic_features, st=None, en=None)->dict: dyn = dict() for stn in stations: fname = os.path.join(self.ts_path, f"{stn}_data.csv") df = pd.read_csv(fname) df.index = pd.to_datetime(df.pop('datetime')) dyn[stn] = df[self.dynamic_features] return dyn @property def start(self): return "20111001 12:00" @property def end(self): return "20180930 11:00"