Source code for aqua_fetch.wq._busan_beach


import os
from typing import Union

import pandas as pd


[docs] def busan_beach( inputs: list = None, target: Union[list, str] = 'tetx_coppml' ) -> pd.DataFrame: """ Loads the Antibiotic resitance genes (ARG) data from a recreational beach in Busan, South Korea along with environment variables. The data is in the form of mutlivariate time series and was collected over the period of 2 years during several precipitation events. The frequency of environmental data is 30 mins while that of ARG is discontinuous. The data and its pre-processing is described in detail in `Jang et al., 2021 <https://doi.org/10.1016/j.watres.2021.117001>`_ Arguments --------- inputs : features to use as input. By default all environmental data is used which consists of following parameters - tide_cm - wat_temp_c - sal_psu - air_temp_c - pcp_mm - pcp3_mm - pcp6_mm - pcp12_mm - wind_dir_deg - wind_speed_mps - air_p_hpa - mslp_hpa - rel_hum target : feature/features to use as target/output. By default `tetx_coppml` is used as target. Logically one or more from following can be considered as target - ecoli - 16s - inti1 - Total_args - tetx_coppml - sul1_coppml - blaTEM_coppml - aac_coppml - Total_otus - otu_5575 - otu_273 - otu_94 Returns ------- pd.DataFrame a pandas dataframe with inputs and target and indexed with pandas.DateTimeIndex Examples -------- >>> from water_quality import busan_beach >>> dataframe = busan_beach() >>> dataframe.shape (1446, 14) >>> dataframe = busan_beach(target=['tetx_coppml', 'sul1_coppml']) >>> dataframe.shape (1446, 15) See usage `here <https://tabulight.readthedocs.io/en/latest/auto_examples/busan_beach.html>`_ for more details. """ path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data") if not os.path.exists(path): os.makedirs(path) fpath = os.path.join(path, "arg_busan.csv") if os.path.exists(fpath): df = pd.read_csv(fpath, index_col="index") else: df = pd.read_csv( "https://raw.githubusercontent.com/AtrCheema/AI4Water/ec2a4a426673b11e3589b64cef9d7160b1de28d4/ai4water/datasets/arg_busan.csv", index_col="index") df.to_csv(fpath, index=True, index_label="index") df.index = pd.to_datetime(df.index) default_inputs = ['tide_cm', 'wat_temp_c', 'sal_psu', 'air_temp_c', 'pcp_mm', 'pcp3_mm', 'pcp6_mm', 'pcp12_mm', 'wind_dir_deg', 'wind_speed_mps', 'air_p_hpa', 'mslp_hpa', 'rel_hum' ] default_targets = [col for col in df.columns if col not in default_inputs] if inputs is None: inputs = default_inputs if not isinstance(target, list): if isinstance(target, str): target = [target] elif isinstance(target, list): pass else: target = default_targets assert isinstance(target, list) df = df[inputs + target] return df