__all__ = ["GRQA"]
import os
from typing import Union, List
import numpy as np
import pandas as pd
from .._datasets import Datasets
from ..utils import check_st_en, check_attributes
DTYPES = {
'BOD5': {
'obs_id': str,
'obs_time_zone': str, 'site_id': str, 'site_name': str, 'site_country': str,
'upstream_basin_area': np.float32, 'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str, 'param_name': str, 'source_param_name': str,
'obs_value': np.float32, 'source_obs_value': np.float32,
'detection_limit_flag': str,
'WATERBASE_meta_procedureAnalysedMatrix': str,
'WATERBASE_meta_Remarks': str,
'WQP_meta_ResultAnalyticalMethod_MethodName': str,
'WQP_meta_ResultLaboratoryCommentText': str,
},
'BOD': {
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str, 'GEMSTAT_meta_Method_Description': str
},
'COD': {
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str, 'GEMSTAT_meta_Method_Description': str
},
'DIC': {
'obs_id': str,
'obs_time_zone': str, 'site_id': str, 'site_name': str, 'site_country': str,
'upstream_basin_area': np.float32, 'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str, 'param_name': str, 'source_param_name': str,
'obs_value': np.float32, 'source_obs_value': np.float32,
'detection_limit_flag': str,
'source': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WQP_meta_ResultAnalyticalMethod_MethodName': str,
'WQP_meta_ResultLaboratoryCommentText': str,
},
'DIP': {
'obs_id': str,
'obs_time_zone': str, 'site_id': str, 'site_name': str, 'site_country': str,
'upstream_basin_area': np.float32, 'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str, 'param_name': str, 'source_param_name': str,
'obs_value': np.float32, 'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
},
'DKN': {
'obs_id': str,
'obs_time_zone': str, 'site_id': str, 'site_name': str, 'site_country': str,
'upstream_basin_area': np.float32, 'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str, 'param_name': str, 'source_param_name': str,
'obs_value': np.float32, 'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
},
'DOC': {
'obs_id': str,
'obs_time_zone': str, 'site_id': str, 'site_name': str, 'site_country': str,
'upstream_basin_area': np.float32, 'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str, 'param_name': str, 'source_param_name': str,
'obs_value': np.float32, 'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WATERBASE_meta_procedureAnalysedMatrix': str,
'WATERBASE_meta_Remarks': str,
'WQP_meta_ResultAnalyticalMethod_MethodName': str,
'WQP_meta_ResultLaboratoryCommentText': str,
},
'DON': {
'obs_id': str,
'obs_time_zone': str, 'site_id': str, 'site_name': str, 'site_country': str,
'upstream_basin_area': np.float32, 'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str, 'param_name': str, 'source_param_name': str,
'obs_value': np.float32, 'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WQP_meta_ResultAnalyticalMethod_MethodName': str,
'WQP_meta_ResultLaboratoryCommentText': str,
},
'DOSAT': {
'obs_id': str,
'obs_time_zone': str, 'site_id': str, 'site_name': str, 'site_country': str,
'upstream_basin_area': np.float32, 'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str, 'param_name': str, 'source_param_name': str,
'obs_value': np.float32, 'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WATERBASE_meta_procedureAnalysedMatrix': str,
'WATERBASE_meta_Remarks': str,
'WQP_meta_ResultAnalyticalMethod_MethodName': str,
'WQP_meta_ResultLaboratoryCommentText': str,
},
'NH4N': {
'obs_id': str,
'obs_time_zone': str, 'site_id': str, 'site_name': str, 'site_country': str,
'upstream_basin_area': np.float32, 'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str, 'param_name': str, 'source_param_name': str,
'obs_value': np.float32, 'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WATERBASE_meta_procedureAnalysedMatrix': str,
'WATERBASE_meta_Remarks': str,
},
'NO2N': {
'obs_id': str,
'obs_time_zone': str, 'site_id': str, 'site_name': str, 'site_country': str,
'upstream_basin_area': np.float32, 'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str, 'param_name': str, 'source_param_name': str,
'obs_value': np.float32, 'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WATERBASE_meta_procedureAnalysedMatrix': str,
'WATERBASE_meta_Remarks': str,
'WQP_meta_ResultAnalyticalMethod_MethodName': str,
'WQP_meta_ResultLaboratoryCommentText': str,
},
'NO3N': {
'obs_id': str,
'obs_time_zone': str, 'site_id': str, 'site_name': str, 'site_country': str,
'upstream_basin_area': np.float32, 'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str, 'param_name': str, 'source_param_name': str,
'obs_value': np.float32, 'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WATERBASE_meta_procedureAnalysedMatrix': str,
'WATERBASE_meta_Remarks': str,
'WQP_meta_ResultAnalyticalMethod_MethodName': str,
'WQP_meta_ResultLaboratoryCommentText': str,
},
'pH': {
'obs_id': str,
'obs_time_zone': str, 'site_id': str, 'site_name': str, 'site_country': str,
'upstream_basin_area': np.float32, 'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str, 'param_name': str, 'source_param_name': str,
'obs_value': np.float32, 'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'source_unit': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WATERBASE_meta_procedureAnalysedMatrix': str,
'WATERBASE_meta_Remarks': str,
'WQP_meta_ResultAnalyticalMethod_MethodName': str,
'WQP_meta_ResultLaboratoryCommentText': str,
},
'PN': {
'drainage_region_name': str,
'param_code': str, 'source_param_code': str,
'param_name': str, 'source_param_name': str,
'obs_value': np.float32,
'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'source_unit': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WQP_meta_ResultAnalyticalMethod_MethodName': str,
'WQP_meta_ResultLaboratoryCommentText': str,
},
'POC': {
'obs_id': str,
'obs_time_zone': str,
'site_id': str,
'site_name': str,
'site_country': str,
'upstream_basin_area': np.float32,
'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str,
'param_name': str, 'source_param_name': str,
'obs_value': np.float32,
'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'source_unit': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WQP_meta_ResultAnalyticalMethod_MethodName': str,
'WQP_meta_ResultLaboratoryCommentText': str,
},
'TAN': {
'upstream_basin_area': np.float32,
'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str,
'param_name': str, 'source_param_name': str,
'obs_value': np.float32,
'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'source_unit': str,
'filtration': str,
},
'TDN': {
'obs_id': str,
'obs_time_zone': str,
'site_id': str,
'site_name': str,
'site_country': str,
'upstream_basin_area': np.float32,
'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str,
'param_name': str, 'source_param_name': str,
'obs_value': np.float32,
'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'source_unit': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WQP_meta_ResultAnalyticalMethod_MethodName': str,
'WQP_meta_ResultLaboratoryCommentText': str,
},
'TDP': {
'obs_id': str,
'obs_time_zone': str,
'site_id': str,
'site_name': str,
'site_country': str,
'upstream_basin_area': np.float32,
'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str,
'param_name': str, 'source_param_name': str,
'obs_value': np.float32,
'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'source_unit': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WQP_meta_ResultAnalyticalMethod_MethodName': str,
'WQP_meta_ResultLaboratoryCommentText': str,
},
'TEMP': {
'obs_id': str,
'obs_time_zone': str, 'site_id': str, 'site_name': str, 'site_country': str,
'upstream_basin_area': np.float32, 'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str, 'param_name': str, 'source_param_name': str,
'obs_value': np.float32, 'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'source_unit': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WATERBASE_meta_procedureAnalysedMatrix': str,
'WATERBASE_meta_Remarks': str,
'WQP_meta_ResultAnalyticalMethod_MethodName': str,
'WQP_meta_ResultLaboratoryCommentText': str,
},
'TIC': {
'upstream_basin_area': np.float32,
'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str,
'param_name': str, 'source_param_name': str,
'obs_value': np.float32,
'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'source_unit': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WQP_meta_ResultAnalyticalMethod_MethodName': str,
'WQP_meta_ResultLaboratoryCommentText': str,
},
'TIP': {
'obs_id': str,
'obs_time_zone': str, 'site_id': str, 'site_name': str, 'site_country': str,
'upstream_basin_area': np.float32, 'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str, 'param_name': str, 'source_param_name': str,
'obs_value': np.float32, 'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'source_unit': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
},
'TSS': {
'obs_id': str,
'obs_time_zone': str, 'site_id': str, 'site_name': str, 'site_country': str,
'upstream_basin_area': np.float32, 'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str, 'param_name': str, 'source_param_name': str,
'obs_value': np.float32, 'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'source_unit': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WATERBASE_meta_procedureAnalysedMatrix': str,
'WATERBASE_meta_Remarks': str,
'WQP_meta_ResultAnalyticalMethod_MethodName': str,
'WQP_meta_ResultLaboratoryCommentText': str,
},
'TP': {
'obs_id': str,
'obs_time_zone': str, 'site_id': str, 'site_name': str, 'site_country': str,
'upstream_basin_area': np.float32, 'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str, 'param_name': str, 'source_param_name': str,
'obs_value': np.float32, 'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'source_unit': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WATERBASE_meta_procedureAnalysedMatrix': str,
'WATERBASE_meta_Remarks': str,
'WQP_meta_ResultAnalyticalMethod_MethodName': str,
'WQP_meta_ResultLaboratoryCommentText': str,
},
'TON': {
'obs_id': str,
'obs_time_zone': str, 'site_id': str, 'site_name': str, 'site_country': str,
'upstream_basin_area': np.float32, 'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str, 'param_name': str, 'source_param_name': str,
'obs_value': np.float32, 'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'source_unit': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WATERBASE_meta_procedureAnalysedMatrix': str,
'WATERBASE_meta_Remarks': str,
'WQP_meta_ResultAnalyticalMethod_MethodName': str,
'WQP_meta_ResultLaboratoryCommentText': str,
},
'TOC': {
'obs_id': str,
'obs_time_zone': str, 'site_id': str, 'site_name': str, 'site_country': str,
'upstream_basin_area': np.float32, 'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str, 'param_name': str, 'source_param_name': str,
'obs_value': np.float32, 'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'source_unit': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WATERBASE_meta_procedureAnalysedMatrix': str,
'WATERBASE_meta_Remarks': str,
'WQP_meta_ResultAnalyticalMethod_MethodName': str,
'WQP_meta_ResultLaboratoryCommentText': str,
},
'TN': {
'obs_id': str,
'obs_time_zone': str, 'site_id': str, 'site_name': str, 'site_country': str,
'upstream_basin_area': np.float32, 'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str, 'param_name': str, 'source_param_name': str,
'obs_value': np.float32, 'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'source_unit': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WATERBASE_meta_procedureAnalysedMatrix': str,
'WATERBASE_meta_Remarks': str,
'WQP_meta_ResultAnalyticalMethod_MethodName': str,
'WQP_meta_ResultLaboratoryCommentText': str,
},
'TKN': {
'obs_id': str,
'obs_time_zone': str, 'site_id': str, 'site_name': str, 'site_country': str,
'upstream_basin_area': np.float32, 'upstream_basin_area_unit': str,
'drainage_region_name': str,
'param_code': str, 'source_param_code': str, 'param_name': str, 'source_param_name': str,
'obs_value': np.float32, 'source_obs_value': np.float32,
'detection_limit_flag': str,
'site_country': str,
'source_unit': str,
'filtration': str,
'obs_percentile': np.float32,
'site_ts_continuity': np.float32,
'GEMSTAT_meta_Station_Narrative': str,
'GEMSTAT_meta_Parameter_Description': str,
'GEMSTAT_meta_Analysis_Method_Code': str,
'GEMSTAT_meta_Method_Name': str,
'GEMSTAT_meta_Method_Description': str,
'GLORICH_meta_Value_remark_code': str,
'GLORICH_meta_Meaning': str,
'WATERBASE_meta_procedureAnalysedMatrix': str,
'WATERBASE_meta_Remarks': str,
},
}
[docs]
class GRQA(Datasets):
"""
Global River Water Quality Archive following the work of
`Virro et al., 2021 <https://essd.copernicus.org/articles/13/5483/2021/>`_ .
This dataset comprises of 42 parameters for 94955 sites across 116 countries.
Examples
--------
>>> from water_datasets import GRQA
>>> ds = GRQA(path="/mnt/datawaha/hyex/atr/data")
>>> ds.parameters
['TPP', 'PON', 'TEMP', 'TSS', ...]
>>> print(len(ds.parameters))
42
>>> len(ds.countries)
116
>>> len(ds.stations())
94955
>>> len(ds.parameters)
>>> coords = ds.stn_coords()
>>> coords.shape
(94955, 2)
>>> country = "Pakistan"
>>> len(ds.fetch_parameter('TEMP', country=country))
1324
>>> df = ds.fetch_parameter("TEMP", country=country)
>>> print(df.shape)
(1324, 38)
>>> df = ds.fetch_parameter("NH4N", country=country)
>>> print(df.shape)
(28, 36)
"""
url = 'https://zenodo.org/record/7056647#.YzBzDHZByUk'
[docs]
def __init__(
self,
download_source:bool = False,
path = None,
**kwargs):
"""
parameters
----------
download_source : bool
whether to download source data or not
"""
super().__init__(path=path, **kwargs)
files = ['GRQA_data_v1.3.zip', 'GRQA_meta.zip']
if download_source:
files += ['GRQA_source_data.zip']
self._download(include=files)
@property
def files(self):
return os.listdir(os.path.join(self.path, "GRQA_data_v1.3", "GRQA_data_v1.3"))
@property
def parameters(self):
return [f.split('_')[0] for f in self.files]
[docs]
def stations(self)->List[str]:
"""Returns names of stations/site_id"""
return self.sites_data().index.tolist()
@property
def countries(self)->List[str]:
return self.sites_data()['site_country'].dropna().unique().tolist()
[docs]
def fetch_parameter(
self,
parameter: str = "COD",
site_name: Union[List[str], str] = None,
country: Union[List[str], str] = None,
st:Union[int, str, pd.DatetimeIndex] = None,
en:Union[int, str, pd.DatetimeIndex] = None,
)->pd.DataFrame:
"""
parameters
----------
parameter : str, optional
name of parameter
site_name : str/list, optional
location for which data is to be fetched.
country : str/list optional (default=None)
st : str
starting date date or index
en : str
end date or index
Returns
-------
pd.DataFrame
a pandas dataframe
Example
--------
>>> from water_quality import GRQA
>>> dataset = GRQA()
>>> df = dataset.fetch_parameter()
fetch data for only one country
>>> cod_pak = dataset.fetch_parameter("COD", country="Pakistan")
fetch data for only one site
>>> cod_kotri = dataset.fetch_parameter("COD", site_name="Indus River - at Kotri")
we can find out the number of data points and sites available for a specific country as below
>>> for para in dataset.parameters:
>>> data = dataset.fetch_parameter(para, country="Germany")
>>> if len(data)>0:
>>> print(f"{para}, {df.shape}, {len(df['site_name'].unique())}")
"""
assert isinstance(parameter, str)
assert parameter in self.parameters
if isinstance(site_name, str):
site_name = [site_name]
if isinstance(country, str):
country = [country]
df = self._load_df(parameter)
if site_name is not None:
assert isinstance(site_name, list)
df = df[df['site_name'].isin(site_name)]
if country is not None:
assert isinstance(country, list)
df = df[df['site_country'].isin(country)]
df.index = pd.to_datetime(df.pop("obs_date") + " " + df.pop("obs_time"), errors='coerce')
return check_st_en(df, st, en)
def _load_df(self, parameter, **read_kws):
if hasattr(self, f"_load_{parameter}"):
return getattr(self, f"_load_{parameter}")()
fname = os.path.join(self.path, "GRQA_data_v1.3", "GRQA_data_v1.3", f"{parameter}_GRQA.csv")
if parameter in DTYPES:
return pd.read_csv(fname, sep=";", dtype=DTYPES[parameter], **read_kws)
return pd.read_csv(fname, sep=";", **read_kws)
def _load_DO(self):
# read_csv is causing mysterious errors
f = os.path.join(self.path, "GRQA_data_v1.3",
"GRQA_data_v1.3", f"DO_GRQA.csv")
lines = []
with open(f, 'r', encoding='utf-8') as fp:
for idx, line in enumerate(fp):
lines.append(line.split(';'))
return pd.DataFrame(lines[1:], columns=lines[0])
[docs]
def stn_coords(self):
"""
Returns the coordinates of all the stations in the dataset
Returns
-------
pd.DataFrame
A dataframe with columns 'lat', 'long'
"""
sites = self.sites_data()
return sites[['lat', 'long']].dropna().astype(np.float32)
[docs]
def sites_data(self)->pd.DataFrame:
"""
Returns the meta data for the dataset
"""
fpath = os.path.join(self.path, 'sites.csv')
if os.path.exists(fpath):
if self.verbosity:
print(f"loading from pre-existing{fpath}")
return pd.read_csv(fpath, index_col=0)
dfs = []
cols = ['lat_wgs84', 'lon_wgs84', 'site_name', 'site_country',
'upstream_basin_area', 'upstream_basin_area_unit']
for idx, para in enumerate(self.parameters):
df1 = self._load_df(para,
usecols = ['site_id'] + cols
).set_index('site_id')[cols]
duplicates = df1.index.duplicated(keep='first') # Keep the first occurrence, mark others as duplicate
# Drop duplicates based on the index
df2 = df1[~duplicates]
dfs.append(df2)
if self.verbosity> 1: print(idx, para )
df = pd.concat(dfs)
print(df.shape)
duplicates = df.index.duplicated(keep='first') # Keep the first occurrence, mark others as duplicate
# Drop duplicates based on the index
df = df[~duplicates]
df.rename(columns={'lat_wgs84': 'lat', 'lon_wgs84': 'long',
'upstream_basin_area': 'basin_area_km2',
'upstream_basin_area_unit': 'area_unit'}, inplace=True)
df.replace('', np.nan, inplace=True)
area_unit = df['area_unit']
area_unit = area_unit.replace('', np.nan).dropna().unique()
assert len(area_unit) == 1
assert area_unit[0] == 'sq mi'
# convert basin_area from sq mi to km2
df['basin_area_km2'] = df['basin_area_km2'].astype(np.float32) * 2.58999
df = df.drop(columns=['area_unit'])
df.to_csv(fpath, index=True)
return df