Source code for aqua_fetch.wq._grimedb

__all__ = ["GRiMeDB"]

import os
from typing import Union, List

import pandas as pd
import numpy as np

from .._datasets import Datasets
from ..utils import check_attributes


[docs] class GRiMeDB(Datasets): """ Global river database of methan concentrations and fluxes from 5029 stations of 305 rivers following `Stanley et al., 2023 <https://doi.org/10.5194/essd-15-2879-2023>`_ Examples -------- >>> from water_datasets import GRiMeDB >>> ds = GRiMeDB(path='/path/to/dataset') >>> ds.stations() >>> ds.streams >>> ds.stn_coords() >>> ds.shape 5029, 2 """ url = { "concentrations.csv": "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.420.1&entityid=ba3e270bcab8ace5d157c995e4b791e4", "fluxes.csv": "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.420.1&entityid=1a559f00566ed9f9f33ccb0daab0bef5", "sites.csv": "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.420.1&entityid=3faa64303d5f5bcd043bb88f6768e603", "sources.csv": "https://portal.edirepository.org/nis/dataviewer?packageid=knb-lter-ntl.420.1&entityid=3615386d27a2d148be09e70ac22799e4" }
[docs] def __init__(self, path=None, **kwargs): super().__init__(path=path, **kwargs) self.ds_dir = path self._download() self._stations = self.sites()['Site_ID'].unique().tolist()
def stations(self)->List[str]: return self._stations @property def streams(self)->List[str]: """returns names of streams""" return self.sites()['Stream_Name'].unique().tolist()
[docs] def stn_coords(self)->pd.DataFrame: """ Returns the coordinates of all the stations in the dataset in wgs84 projection. Returns ------- pd.DataFrame A dataframe with columns 'lat', 'long' """ coords = self.sites()[['Latitude', 'Longitude']] coords.columns = ['lat', 'long'] return coords.astype(np.float32)
[docs] def concentrations( self, stations: Union[str, List[str]] = "all", streams: Union[str, List[str]] = "all", parameters: Union[str, List[str]] = "all" ): """ Get concentrations data. Parameters ---------- stations : Union[str, List[str]], optional station ID or list of station IDs, by default "all". If given, then ``streams`` must not be given. Check `.stations()` method for available stations. streams : Union[str, List[str]], optional stream name or list of stream names, by default "all". If given, then ``stations`` must not be given. Check `.streams` attribute for available streams. parameters : Union[str, List[str]], optional parameters to return, by default "all". Check `.parameters` attribute for available parameters. """ if stations != "all" and streams != "all": raise ValueError("Either stations or streams must be provided, not both.") fpath = os.path.join(self.path, 'concentrations.csv') df = pd.read_csv(fpath, dtype={ 'pH': np.float32, #'Site_ID': int, #'Aggregated_Space': bool, #'Aggregated_Time': bool, #'FluxYesNo': bool }, # converters are taking time converters={'Date_start': pd.to_datetime, 'Date_end': pd.to_datetime}, na_values={'pH': '.'}) if stations != "all": stations = check_attributes(stations, self._stations, 'stations') df = df[df['Site_ID'].isin(stations)] elif streams != "all": streams = check_attributes(streams, self.streams, 'streams') sites = self.sites() stations = sites.loc[sites['Stream_Name'].isin(streams), 'Site_ID'].values.tolist() df = df[df['Site_ID'].isin(stations)] if parameters != "all": df = df[parameters] return df
def sites(self): fpath = os.path.join(self.path, 'sites.csv') df = pd.read_csv( fpath, dtype={'Site_Name': str, 'Stream_Name': str, 'Basin_Region': str, 'Site_ID': str} ) return df
[docs] def fluxes( self, stations: Union[str, List[str]] = "all", )->pd.DataFrame: """returns fluxes data as a pandas dataframe""" fpath = os.path.join(self.path, 'fluxes.csv') df = pd.read_csv(fpath) if stations != "all": stations = check_attributes(stations, self._stations, 'stations') df = df[df['Site_ID'].isin(stations)] return df
def sources(self): fpath = os.path.join(self.path, 'sources.csv') df = pd.read_csv(fpath) return df